From 61c2c6764e6fcc95507dcf672d15f33b0af300f2 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Wed, 6 Jul 2022 16:59:32 -0700 Subject: selftests/vm: modularize collapse selftests Modularize the collapse action of khugepaged collapse selftests by introducing a struct collapse_context which specifies how to collapse a given memory range and the expected semantics of the collapse. This can be reused later to test other collapse contexts. Additionally, all tests have logic that checks if a collapse occurred via reading /proc/self/smaps, and report if this is different than expected. Move this logic into the per-context ->collapse() hook instead of repeating it in every test. Link: https://lkml.kernel.org/r/20220706235936.2197195-15-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Alex Shi Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Chris Kennelly Cc: Chris Zankel Cc: David Hildenbrand Cc: David Rientjes Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Miaohe Lin Cc: Michal Hocko Cc: Minchan Kim Cc: Pasha Tatashin Cc: Pavel Begunkov Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Thomas Bogendoerfer Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Cc: Dan Carpenter Cc: "Souptick Joarder (HPE)" Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 251 ++++++++++++++------------------ 1 file changed, 110 insertions(+), 141 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 155120b67a16..0f1bee0eff24 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -23,6 +23,11 @@ static int hpage_pmd_nr; #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" +struct collapse_context { + void (*collapse)(const char *msg, char *p, bool expect); + bool enforce_pte_scan_limits; +}; + enum thp_enabled { THP_ALWAYS, THP_MADVISE, @@ -501,6 +506,21 @@ static bool wait_for_scan(const char *msg, char *p) return timeout == -1; } +static void khugepaged_collapse(const char *msg, char *p, bool expect) +{ + if (wait_for_scan(msg, p)) { + if (expect) + fail("Timeout"); + else + success("OK"); + return; + } else if (check_huge(p) == expect) { + success("OK"); + } else { + fail("Fail"); + } +} + static void alloc_at_fault(void) { struct settings settings = default_settings; @@ -528,53 +548,39 @@ static void alloc_at_fault(void) munmap(p, hpage_pmd_size); } -static void collapse_full(void) +static void collapse_full(struct collapse_context *c) { void *p; p = alloc_mapping(); fill_memory(p, 0, hpage_pmd_size); - if (wait_for_scan("Collapse fully populated PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse fully populated PTE table", p, true); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); } -static void collapse_empty(void) +static void collapse_empty(struct collapse_context *c) { void *p; p = alloc_mapping(); - if (wait_for_scan("Do not collapse empty PTE table", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); + c->collapse("Do not collapse empty PTE table", p, false); munmap(p, hpage_pmd_size); } -static void collapse_single_pte_entry(void) +static void collapse_single_pte_entry(struct collapse_context *c) { void *p; p = alloc_mapping(); fill_memory(p, 0, page_size); - if (wait_for_scan("Collapse PTE table with single PTE entry present", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table with single PTE entry present", p, + true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } -static void collapse_max_ptes_none(void) +static void collapse_max_ptes_none(struct collapse_context *c) { int max_ptes_none = hpage_pmd_nr / 2; struct settings settings = default_settings; @@ -586,28 +592,22 @@ static void collapse_max_ptes_none(void) p = alloc_mapping(); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); + c->collapse("Maybe collapse with max_ptes_none exceeded", p, + !c->enforce_pte_scan_limits); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); + if (c->enforce_pte_scan_limits) { + fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); + c->collapse("Collapse with max_ptes_none PTEs empty", p, true); + validate_memory(p, 0, + (hpage_pmd_nr - max_ptes_none) * page_size); + } munmap(p, hpage_pmd_size); write_settings(&default_settings); } -static void collapse_swapin_single_pte(void) +static void collapse_swapin_single_pte(struct collapse_context *c) { void *p; p = alloc_mapping(); @@ -625,18 +625,13 @@ static void collapse_swapin_single_pte(void) goto out; } - if (wait_for_scan("Collapse with swapping in single PTE entry", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse with swapping in single PTE entry", p, true); validate_memory(p, 0, hpage_pmd_size); out: munmap(p, hpage_pmd_size); } -static void collapse_max_ptes_swap(void) +static void collapse_max_ptes_swap(struct collapse_context *c) { int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); void *p; @@ -656,39 +651,34 @@ static void collapse_max_ptes_swap(void) goto out; } - if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p)) - fail("Timeout"); - else if (check_huge(p)) - fail("Fail"); - else - success("OK"); + c->collapse("Maybe collapse with max_ptes_swap exceeded", p, + !c->enforce_pte_scan_limits); validate_memory(p, 0, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); - printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr); - if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { - perror("madvise(MADV_PAGEOUT)"); - exit(EXIT_FAILURE); - } - if (check_swap(p, max_ptes_swap * page_size)) { - success("OK"); - } else { - fail("Fail"); - goto out; - } + if (c->enforce_pte_scan_limits) { + fill_memory(p, 0, hpage_pmd_size); + printf("Swapout %d of %d pages...", max_ptes_swap, + hpage_pmd_nr); + if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { + perror("madvise(MADV_PAGEOUT)"); + exit(EXIT_FAILURE); + } + if (check_swap(p, max_ptes_swap * page_size)) { + success("OK"); + } else { + fail("Fail"); + goto out; + } - if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); - validate_memory(p, 0, hpage_pmd_size); + c->collapse("Collapse with max_ptes_swap pages swapped out", p, + true); + validate_memory(p, 0, hpage_pmd_size); + } out: munmap(p, hpage_pmd_size); } -static void collapse_single_pte_entry_compound(void) +static void collapse_single_pte_entry_compound(struct collapse_context *c) { void *p; @@ -710,17 +700,13 @@ static void collapse_single_pte_entry_compound(void) else fail("Fail"); - if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table with single PTE mapping compound page", + p, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } -static void collapse_full_of_compound(void) +static void collapse_full_of_compound(struct collapse_context *c) { void *p; @@ -742,17 +728,12 @@ static void collapse_full_of_compound(void) else fail("Fail"); - if (wait_for_scan("Collapse PTE table full of compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of compound pages", p, true); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); } -static void collapse_compound_extreme(void) +static void collapse_compound_extreme(struct collapse_context *c) { void *p; int i; @@ -798,18 +779,14 @@ static void collapse_compound_extreme(void) else fail("Fail"); - if (wait_for_scan("Collapse PTE table full of different compound pages", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of different compound pages", p, + true); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); } -static void collapse_fork(void) +static void collapse_fork(struct collapse_context *c) { int wstatus; void *p; @@ -835,13 +812,8 @@ static void collapse_fork(void) fail("Fail"); fill_memory(p, page_size, 2 * page_size); - - if (wait_for_scan("Collapse PTE table with single page shared with parent process", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table with single page shared with parent process", + p, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); @@ -860,7 +832,7 @@ static void collapse_fork(void) munmap(p, hpage_pmd_size); } -static void collapse_fork_compound(void) +static void collapse_fork_compound(struct collapse_context *c) { int wstatus; void *p; @@ -896,14 +868,10 @@ static void collapse_fork_compound(void) fill_memory(p, 0, page_size); write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); - if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Collapse PTE table full of compound pages in child", + p, true); write_num("khugepaged/max_ptes_shared", - default_settings.khugepaged.max_ptes_shared); + default_settings.khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); @@ -922,7 +890,7 @@ static void collapse_fork_compound(void) munmap(p, hpage_pmd_size); } -static void collapse_max_ptes_shared() +static void collapse_max_ptes_shared(struct collapse_context *c) { int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); int wstatus; @@ -957,28 +925,22 @@ static void collapse_max_ptes_shared() else fail("Fail"); - if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p)) - fail("Timeout"); - else if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - printf("Trigger CoW on page %d of %d...", - hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (!check_huge(p)) - success("OK"); - else - fail("Fail"); - - - if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p)) - fail("Timeout"); - else if (check_huge(p)) - success("OK"); - else - fail("Fail"); + c->collapse("Maybe collapse with max_ptes_shared exceeded", p, + !c->enforce_pte_scan_limits); + + if (c->enforce_pte_scan_limits) { + printf("Trigger CoW on page %d of %d...", + hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); + fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * + page_size); + if (!check_huge(p)) + success("OK"); + else + fail("Fail"); + + c->collapse("Collapse with max_ptes_shared PTEs shared", + p, true); + } validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); @@ -999,6 +961,8 @@ static void collapse_max_ptes_shared() int main(void) { + struct collapse_context c; + setbuf(stdout, NULL); page_size = getpagesize(); @@ -1014,18 +978,23 @@ int main(void) adjust_settings(); alloc_at_fault(); - collapse_full(); - collapse_empty(); - collapse_single_pte_entry(); - collapse_max_ptes_none(); - collapse_swapin_single_pte(); - collapse_max_ptes_swap(); - collapse_single_pte_entry_compound(); - collapse_full_of_compound(); - collapse_compound_extreme(); - collapse_fork(); - collapse_fork_compound(); - collapse_max_ptes_shared(); + + printf("\n*** Testing context: khugepaged ***\n"); + c.collapse = &khugepaged_collapse; + c.enforce_pte_scan_limits = true; + + collapse_full(&c); + collapse_empty(&c); + collapse_single_pte_entry(&c); + collapse_max_ptes_none(&c); + collapse_swapin_single_pte(&c); + collapse_max_ptes_swap(&c); + collapse_single_pte_entry_compound(&c); + collapse_full_of_compound(&c); + collapse_compound_extreme(&c); + collapse_fork(&c); + collapse_fork_compound(&c); + collapse_max_ptes_shared(&c); restore_settings(0); } -- cgit v1.2.3 From be6667b0db97e10b2a6d57a906c2c8fd2b985e5e Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Wed, 6 Jul 2022 16:59:33 -0700 Subject: selftests/vm: dedup hugepage allocation logic The code p = alloc_mapping(); printf("Allocate huge page..."); madvise(p, hpage_pmd_size, MADV_HUGEPAGE); fill_memory(p, 0, hpage_pmd_size); if (check_huge(p)) success("OK"); else fail("Fail"); Is repeated many times in different tests. Add a helper, alloc_hpage() to handle this. Link: https://lkml.kernel.org/r/20220706235936.2197195-16-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Alex Shi Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Chris Kennelly Cc: Chris Zankel Cc: David Hildenbrand Cc: David Rientjes Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Miaohe Lin Cc: Michal Hocko Cc: Minchan Kim Cc: Pasha Tatashin Cc: Pavel Begunkov Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Thomas Bogendoerfer Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Cc: Dan Carpenter Cc: "Souptick Joarder (HPE)" Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 62 ++++++++++++--------------------- 1 file changed, 23 insertions(+), 39 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 0f1bee0eff24..eb6f5bbacff1 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -461,6 +461,25 @@ static void fill_memory(int *p, unsigned long start, unsigned long end) p[i * page_size / sizeof(*p)] = i + 0xdead0000; } +/* + * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with + * validate_memory()'able contents. + */ +static void *alloc_hpage(void) +{ + void *p; + + p = alloc_mapping(); + printf("Allocate huge page..."); + madvise(p, hpage_pmd_size, MADV_HUGEPAGE); + fill_memory(p, 0, hpage_pmd_size); + if (check_huge(p)) + success("OK"); + else + fail("Fail"); + return p; +} + static void validate_memory(int *p, unsigned long start, unsigned long end) { int i; @@ -682,15 +701,7 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c) { void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); + p = alloc_hpage(); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); @@ -710,16 +721,7 @@ static void collapse_full_of_compound(struct collapse_context *c) { void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(); printf("Split huge page leaving single PTE page table full of compound pages..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); @@ -837,16 +839,7 @@ static void collapse_fork_compound(struct collapse_context *c) int wstatus; void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ @@ -896,16 +889,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c) int wstatus; void *p; - p = alloc_mapping(); - - printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) - success("OK"); - else - fail("Fail"); - + p = alloc_hpage(); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ -- cgit v1.2.3 From 9330694de59f17e110a58be486a2c64d32bab42f Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Wed, 6 Jul 2022 16:59:34 -0700 Subject: selftests/vm: add MADV_COLLAPSE collapse context to selftests Add madvise collapse context to hugepage collapse selftests. This context is tested with /sys/kernel/mm/transparent_hugepage/enabled set to "never" in order to avoid unwanted interaction with khugepaged during testing. Also, refactor updates to sysfs THP settings using a stack so that the THP settings from nested callers can be restored. Link: https://lkml.kernel.org/r/20220706235936.2197195-17-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Alex Shi Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Chris Kennelly Cc: Chris Zankel Cc: David Hildenbrand Cc: David Rientjes Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Miaohe Lin Cc: Michal Hocko Cc: Minchan Kim Cc: Pasha Tatashin Cc: Pavel Begunkov Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Thomas Bogendoerfer Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Cc: Dan Carpenter Cc: "Souptick Joarder (HPE)" Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 171 +++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 46 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index eb6f5bbacff1..780f04440e15 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -14,6 +14,9 @@ #ifndef MADV_PAGEOUT #define MADV_PAGEOUT 21 #endif +#ifndef MADV_COLLAPSE +#define MADV_COLLAPSE 25 +#endif #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; @@ -95,18 +98,6 @@ struct settings { struct khugepaged_settings khugepaged; }; -static struct settings default_settings = { - .thp_enabled = THP_MADVISE, - .thp_defrag = THP_DEFRAG_ALWAYS, - .shmem_enabled = SHMEM_NEVER, - .use_zero_page = 0, - .khugepaged = { - .defrag = 1, - .alloc_sleep_millisecs = 10, - .scan_sleep_millisecs = 10, - }, -}; - static struct settings saved_settings; static bool skip_settings_restore; @@ -284,6 +275,39 @@ static void write_settings(struct settings *settings) write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); } +#define MAX_SETTINGS_DEPTH 4 +static struct settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; + +static struct settings *current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +static void push_settings(struct settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + write_settings(current_settings()); +} + +static void pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + write_settings(current_settings()); +} + static void restore_settings(int sig) { if (skip_settings_restore) @@ -327,14 +351,6 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -static void adjust_settings(void) -{ - - printf("Adjust settings..."); - write_settings(&default_settings); - success("OK"); -} - #define MAX_LINE_LENGTH 500 static bool check_for_pattern(FILE *fp, char *pattern, char *buf) @@ -493,6 +509,38 @@ static void validate_memory(int *p, unsigned long start, unsigned long end) } } +static void madvise_collapse(const char *msg, char *p, bool expect) +{ + int ret; + struct settings settings = *current_settings(); + + printf("%s...", msg); + /* Sanity check */ + if (check_huge(p)) { + printf("Unexpected huge page\n"); + exit(EXIT_FAILURE); + } + + /* + * Prevent khugepaged interference and tests that MADV_COLLAPSE + * ignores /sys/kernel/mm/transparent_hugepage/enabled + */ + settings.thp_enabled = THP_NEVER; + push_settings(&settings); + + /* Clear VM_NOHUGEPAGE */ + madvise(p, hpage_pmd_size, MADV_HUGEPAGE); + ret = madvise(p, hpage_pmd_size, MADV_COLLAPSE); + if (((bool)ret) == expect) + fail("Fail: Bad return value"); + else if (check_huge(p) != expect) + fail("Fail: check_huge()"); + else + success("OK"); + + pop_settings(); +} + #define TICK 500000 static bool wait_for_scan(const char *msg, char *p) { @@ -542,11 +590,11 @@ static void khugepaged_collapse(const char *msg, char *p, bool expect) static void alloc_at_fault(void) { - struct settings settings = default_settings; + struct settings settings = *current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - write_settings(&settings); + push_settings(&settings); p = alloc_mapping(); *p = 1; @@ -556,7 +604,7 @@ static void alloc_at_fault(void) else fail("Fail"); - write_settings(&default_settings); + pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); @@ -602,11 +650,11 @@ static void collapse_single_pte_entry(struct collapse_context *c) static void collapse_max_ptes_none(struct collapse_context *c) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = default_settings; + struct settings settings = *current_settings(); void *p; settings.khugepaged.max_ptes_none = max_ptes_none; - write_settings(&settings); + push_settings(&settings); p = alloc_mapping(); @@ -623,7 +671,7 @@ static void collapse_max_ptes_none(struct collapse_context *c) } munmap(p, hpage_pmd_size); - write_settings(&default_settings); + pop_settings(); } static void collapse_swapin_single_pte(struct collapse_context *c) @@ -703,7 +751,6 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c) p = alloc_hpage(); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); if (!check_huge(p)) @@ -864,7 +911,7 @@ static void collapse_fork_compound(struct collapse_context *c) c->collapse("Collapse PTE table full of compound pages in child", p, true); write_num("khugepaged/max_ptes_shared", - default_settings.khugepaged.max_ptes_shared); + current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); @@ -943,9 +990,21 @@ static void collapse_max_ptes_shared(struct collapse_context *c) munmap(p, hpage_pmd_size); } -int main(void) +int main(int argc, const char **argv) { struct collapse_context c; + struct settings default_settings = { + .thp_enabled = THP_MADVISE, + .thp_defrag = THP_DEFRAG_ALWAYS, + .shmem_enabled = SHMEM_NEVER, + .use_zero_page = 0, + .khugepaged = { + .defrag = 1, + .alloc_sleep_millisecs = 10, + .scan_sleep_millisecs = 10, + }, + }; + const char *tests = argc == 1 ? "all" : argv[1]; setbuf(stdout, NULL); @@ -959,26 +1018,46 @@ int main(void) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; save_settings(); - adjust_settings(); + push_settings(&default_settings); alloc_at_fault(); - printf("\n*** Testing context: khugepaged ***\n"); - c.collapse = &khugepaged_collapse; - c.enforce_pte_scan_limits = true; - - collapse_full(&c); - collapse_empty(&c); - collapse_single_pte_entry(&c); - collapse_max_ptes_none(&c); - collapse_swapin_single_pte(&c); - collapse_max_ptes_swap(&c); - collapse_single_pte_entry_compound(&c); - collapse_full_of_compound(&c); - collapse_compound_extreme(&c); - collapse_fork(&c); - collapse_fork_compound(&c); - collapse_max_ptes_shared(&c); + if (!strcmp(tests, "khugepaged") || !strcmp(tests, "all")) { + printf("\n*** Testing context: khugepaged ***\n"); + c.collapse = &khugepaged_collapse; + c.enforce_pte_scan_limits = true; + + collapse_full(&c); + collapse_empty(&c); + collapse_single_pte_entry(&c); + collapse_max_ptes_none(&c); + collapse_swapin_single_pte(&c); + collapse_max_ptes_swap(&c); + collapse_single_pte_entry_compound(&c); + collapse_full_of_compound(&c); + collapse_compound_extreme(&c); + collapse_fork(&c); + collapse_fork_compound(&c); + collapse_max_ptes_shared(&c); + } + if (!strcmp(tests, "madvise") || !strcmp(tests, "all")) { + printf("\n*** Testing context: madvise ***\n"); + c.collapse = &madvise_collapse; + c.enforce_pte_scan_limits = false; + + collapse_full(&c); + collapse_empty(&c); + collapse_single_pte_entry(&c); + collapse_max_ptes_none(&c); + collapse_swapin_single_pte(&c); + collapse_max_ptes_swap(&c); + collapse_single_pte_entry_compound(&c); + collapse_full_of_compound(&c); + collapse_compound_extreme(&c); + collapse_fork(&c); + collapse_fork_compound(&c); + collapse_max_ptes_shared(&c); + } restore_settings(0); } -- cgit v1.2.3 From 1370a21fe4708b3bfa748d355146e4fbadf52f90 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Wed, 6 Jul 2022 16:59:35 -0700 Subject: selftests/vm: add selftest to verify recollapse of THPs Add selftest specific to madvise collapse context that tests MADV_COLLAPSE is "successful" if a hugepage-aligned/sized region is already pmd-mapped. This test also verifies that MADV_COLLAPSE can collapse memory into THPs even in "madvise" THP mode and the memory isn't marked VM_HUGEPAGE. Link: https://lkml.kernel.org/r/20220706235936.2197195-18-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Alex Shi Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Chris Kennelly Cc: Chris Zankel Cc: David Hildenbrand Cc: David Rientjes Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Miaohe Lin Cc: Michal Hocko Cc: Minchan Kim Cc: Pasha Tatashin Cc: Pavel Begunkov Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Thomas Bogendoerfer Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Cc: Dan Carpenter Cc: "Souptick Joarder (HPE)" Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 780f04440e15..87cd0b99477f 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -990,6 +990,36 @@ static void collapse_max_ptes_shared(struct collapse_context *c) munmap(p, hpage_pmd_size); } +static void madvise_collapse_existing_thps(void) +{ + void *p; + int err; + + p = alloc_mapping(); + fill_memory(p, 0, hpage_pmd_size); + + printf("Collapse fully populated PTE table..."); + /* + * Note that we don't set MADV_HUGEPAGE here, which + * also tests that VM_HUGEPAGE isn't required for + * MADV_COLLAPSE in "madvise" mode. + */ + err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); + if (err == 0 && check_huge(p)) { + success("OK"); + printf("Re-collapse PMD-mapped hugepage"); + err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); + if (err == 0 && check_huge(p)) + success("OK"); + else + fail("Fail"); + } else { + fail("Fail"); + } + validate_memory(p, 0, hpage_pmd_size); + munmap(p, hpage_pmd_size); +} + int main(int argc, const char **argv) { struct collapse_context c; @@ -1057,6 +1087,7 @@ int main(int argc, const char **argv) collapse_fork(&c); collapse_fork_compound(&c); collapse_max_ptes_shared(&c); + madvise_collapse_existing_thps(); } restore_settings(0); -- cgit v1.2.3 From 9d0d946840075e0268f4f77fe39ba0f53e84c7c4 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Wed, 6 Jul 2022 16:59:36 -0700 Subject: selftests/vm: add selftest to verify multi THP collapse Add support to allocate and verify collapse of multiple hugepage-sized regions into multiple THPs. Add "nr" argument to check_huge() that instructs check_huge() to check for exactly "nr_hpages" THPs. This has the added benefit of now being able to check for exactly 0 THPs, and so callsites that previously checked the negation of exactly 1 THP are now more correct. ->collapse struct collapse_context hook has been expanded with a "nr_hpages" argument to collapse "nr_hpages" hugepages. The collapse_full() test has been repurposed to collapse 4 THPs at once. It is expected more tests will want to test multi THP collapse (e.g. file/shmem). This is of particular benefit to madvise collapse context given that it may do many THP collapses during a single syscall. Link: https://lkml.kernel.org/r/20220706235936.2197195-19-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Alex Shi Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Chris Kennelly Cc: Chris Zankel Cc: David Hildenbrand Cc: David Rientjes Cc: Helge Deller Cc: Hugh Dickins Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Miaohe Lin Cc: Michal Hocko Cc: Minchan Kim Cc: Pasha Tatashin Cc: Pavel Begunkov Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Thomas Bogendoerfer Cc: Vlastimil Babka Cc: Yang Shi Cc: Zi Yan Cc: Dan Carpenter Cc: "Souptick Joarder (HPE)" Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 140 +++++++++++++++++--------------- 1 file changed, 73 insertions(+), 67 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 87cd0b99477f..b77b1e28cdb3 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -27,7 +27,7 @@ static int hpage_pmd_nr; #define PID_SMAPS "/proc/self/smaps" struct collapse_context { - void (*collapse)(const char *msg, char *p, bool expect); + void (*collapse)(const char *msg, char *p, int nr_hpages, bool expect); bool enforce_pte_scan_limits; }; @@ -362,7 +362,7 @@ static bool check_for_pattern(FILE *fp, char *pattern, char *buf) return false; } -static bool check_huge(void *addr) +static bool check_huge(void *addr, int nr_hpages) { bool thp = false; int ret; @@ -387,7 +387,7 @@ static bool check_huge(void *addr) goto err_out; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", - hpage_pmd_size >> 10); + nr_hpages * (hpage_pmd_size >> 10)); if (ret >= MAX_LINE_LENGTH) { printf("%s: Pattern is too long\n", __func__); exit(EXIT_FAILURE); @@ -455,12 +455,12 @@ err_out: return swap; } -static void *alloc_mapping(void) +static void *alloc_mapping(int nr) { void *p; - p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (p != BASE_ADDR) { printf("Failed to allocate VMA at %p\n", BASE_ADDR); exit(EXIT_FAILURE); @@ -485,11 +485,11 @@ static void *alloc_hpage(void) { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); printf("Allocate huge page..."); madvise(p, hpage_pmd_size, MADV_HUGEPAGE); fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -509,14 +509,15 @@ static void validate_memory(int *p, unsigned long start, unsigned long end) } } -static void madvise_collapse(const char *msg, char *p, bool expect) +static void madvise_collapse(const char *msg, char *p, int nr_hpages, + bool expect) { int ret; struct settings settings = *current_settings(); printf("%s...", msg); /* Sanity check */ - if (check_huge(p)) { + if (!check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } @@ -529,11 +530,11 @@ static void madvise_collapse(const char *msg, char *p, bool expect) push_settings(&settings); /* Clear VM_NOHUGEPAGE */ - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - ret = madvise(p, hpage_pmd_size, MADV_COLLAPSE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); + ret = madvise(p, nr_hpages * hpage_pmd_size, MADV_COLLAPSE); if (((bool)ret) == expect) fail("Fail: Bad return value"); - else if (check_huge(p) != expect) + else if (check_huge(p, nr_hpages) != expect) fail("Fail: check_huge()"); else success("OK"); @@ -542,25 +543,25 @@ static void madvise_collapse(const char *msg, char *p, bool expect) } #define TICK 500000 -static bool wait_for_scan(const char *msg, char *p) +static bool wait_for_scan(const char *msg, char *p, int nr_hpages) { int full_scans; int timeout = 6; /* 3 seconds */ /* Sanity check */ - if (check_huge(p)) { + if (!check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ full_scans = read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { - if (check_huge(p)) + if (check_huge(p, nr_hpages)) break; if (read_num("khugepaged/full_scans") >= full_scans) break; @@ -568,20 +569,21 @@ static bool wait_for_scan(const char *msg, char *p) usleep(TICK); } - madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); + madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE); return timeout == -1; } -static void khugepaged_collapse(const char *msg, char *p, bool expect) +static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, + bool expect) { - if (wait_for_scan(msg, p)) { + if (wait_for_scan(msg, p, nr_hpages)) { if (expect) fail("Timeout"); else success("OK"); return; - } else if (check_huge(p) == expect) { + } else if (check_huge(p, nr_hpages) == expect) { success("OK"); } else { fail("Fail"); @@ -596,10 +598,10 @@ static void alloc_at_fault(void) settings.thp_enabled = THP_ALWAYS; push_settings(&settings); - p = alloc_mapping(); + p = alloc_mapping(1); *p = 1; printf("Allocate huge page on fault..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -608,7 +610,7 @@ static void alloc_at_fault(void) madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -618,20 +620,23 @@ static void alloc_at_fault(void) static void collapse_full(struct collapse_context *c) { void *p; + int nr_hpages = 4; + unsigned long size = nr_hpages * hpage_pmd_size; - p = alloc_mapping(); - fill_memory(p, 0, hpage_pmd_size); - c->collapse("Collapse fully populated PTE table", p, true); - validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + p = alloc_mapping(nr_hpages); + fill_memory(p, 0, size); + c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, + true); + validate_memory(p, 0, size); + munmap(p, size); } static void collapse_empty(struct collapse_context *c) { void *p; - p = alloc_mapping(); - c->collapse("Do not collapse empty PTE table", p, false); + p = alloc_mapping(1); + c->collapse("Do not collapse empty PTE table", p, 1, false); munmap(p, hpage_pmd_size); } @@ -639,10 +644,10 @@ static void collapse_single_pte_entry(struct collapse_context *c) { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, page_size); c->collapse("Collapse PTE table with single PTE entry present", p, - true); + 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } @@ -656,16 +661,17 @@ static void collapse_max_ptes_none(struct collapse_context *c) settings.khugepaged.max_ptes_none = max_ptes_none; push_settings(&settings); - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); - c->collapse("Maybe collapse with max_ptes_none exceeded", p, + c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, !c->enforce_pte_scan_limits); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); if (c->enforce_pte_scan_limits) { fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - c->collapse("Collapse with max_ptes_none PTEs empty", p, true); + c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, + true); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); } @@ -677,7 +683,7 @@ static void collapse_max_ptes_none(struct collapse_context *c) static void collapse_swapin_single_pte(struct collapse_context *c) { void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Swapout one page..."); @@ -692,7 +698,7 @@ static void collapse_swapin_single_pte(struct collapse_context *c) goto out; } - c->collapse("Collapse with swapping in single PTE entry", p, true); + c->collapse("Collapse with swapping in single PTE entry", p, 1, true); validate_memory(p, 0, hpage_pmd_size); out: munmap(p, hpage_pmd_size); @@ -703,7 +709,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c) int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); void *p; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); @@ -718,7 +724,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c) goto out; } - c->collapse("Maybe collapse with max_ptes_swap exceeded", p, + c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, !c->enforce_pte_scan_limits); validate_memory(p, 0, hpage_pmd_size); @@ -738,7 +744,7 @@ static void collapse_max_ptes_swap(struct collapse_context *c) } c->collapse("Collapse with max_ptes_swap pages swapped out", p, - true); + 1, true); validate_memory(p, 0, hpage_pmd_size); } out: @@ -753,13 +759,13 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c) madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse PTE table with single PTE mapping compound page", - p, true); + p, 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); } @@ -772,12 +778,12 @@ static void collapse_full_of_compound(struct collapse_context *c) printf("Split huge page leaving single PTE page table full of compound pages..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); - c->collapse("Collapse PTE table full of compound pages", p, true); + c->collapse("Collapse PTE table full of compound pages", p, 1, true); validate_memory(p, 0, hpage_pmd_size); munmap(p, hpage_pmd_size); } @@ -787,14 +793,14 @@ static void collapse_compound_extreme(struct collapse_context *c) void *p; int i; - p = alloc_mapping(); + p = alloc_mapping(1); for (i = 0; i < hpage_pmd_nr; i++) { printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", i + 1, hpage_pmd_nr); madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); fill_memory(BASE_ADDR, 0, hpage_pmd_size); - if (!check_huge(BASE_ADDR)) { + if (!check_huge(BASE_ADDR, 1)) { printf("Failed to allocate huge page\n"); exit(EXIT_FAILURE); } @@ -823,12 +829,12 @@ static void collapse_compound_extreme(struct collapse_context *c) munmap(BASE_ADDR, hpage_pmd_size); fill_memory(p, 0, hpage_pmd_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); - c->collapse("Collapse PTE table full of different compound pages", p, + c->collapse("Collapse PTE table full of different compound pages", p, 1, true); validate_memory(p, 0, hpage_pmd_size); @@ -840,11 +846,11 @@ static void collapse_fork(struct collapse_context *c) int wstatus; void *p; - p = alloc_mapping(); + p = alloc_mapping(1); printf("Allocate small page..."); fill_memory(p, 0, page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -855,14 +861,14 @@ static void collapse_fork(struct collapse_context *c) skip_settings_restore = true; exit_status = 0; - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); fill_memory(p, page_size, 2 * page_size); c->collapse("Collapse PTE table with single page shared with parent process", - p, true); + p, 1, true); validate_memory(p, 0, page_size); munmap(p, hpage_pmd_size); @@ -873,7 +879,7 @@ static void collapse_fork(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has small page..."); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -893,7 +899,7 @@ static void collapse_fork_compound(struct collapse_context *c) skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -901,7 +907,7 @@ static void collapse_fork_compound(struct collapse_context *c) printf("Split huge page PMD in child process..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -909,7 +915,7 @@ static void collapse_fork_compound(struct collapse_context *c) write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", - p, true); + p, 1, true); write_num("khugepaged/max_ptes_shared", current_settings()->khugepaged.max_ptes_shared); @@ -922,7 +928,7 @@ static void collapse_fork_compound(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -943,7 +949,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c) skip_settings_restore = true; exit_status = 0; - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -951,26 +957,26 @@ static void collapse_max_ptes_shared(struct collapse_context *c) printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Maybe collapse with max_ptes_shared exceeded", p, - !c->enforce_pte_scan_limits); + 1, !c->enforce_pte_scan_limits); if (c->enforce_pte_scan_limits) { printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (!check_huge(p)) + if (check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse with max_ptes_shared PTEs shared", - p, true); + p, 1, true); } validate_memory(p, 0, hpage_pmd_size); @@ -982,7 +988,7 @@ static void collapse_max_ptes_shared(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p)) + if (check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -995,7 +1001,7 @@ static void madvise_collapse_existing_thps(void) void *p; int err; - p = alloc_mapping(); + p = alloc_mapping(1); fill_memory(p, 0, hpage_pmd_size); printf("Collapse fully populated PTE table..."); @@ -1005,11 +1011,11 @@ static void madvise_collapse_existing_thps(void) * MADV_COLLAPSE in "madvise" mode. */ err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p)) { + if (err == 0 && check_huge(p, 1)) { success("OK"); printf("Re-collapse PMD-mapped hugepage"); err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p)) + if (err == 0 && check_huge(p, 1)) success("OK"); else fail("Fail"); -- cgit v1.2.3 From a722d70508d64e4800dbf7e9fbf132d186a6484a Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 8 Aug 2022 10:56:10 -0700 Subject: selftests: vm: add hugetlb_shared userfaultfd test to run_vmtests.sh Patch series "userfaultfd: add /dev/userfaultfd for fine grained access control", v7. Why not ...? ============ - Why not /proc/[pid]/userfaultfd? Two main points (additional discussion [1]): - /proc/[pid]/* files are all owned by the user/group of the process, and they don't really support chmod/chown. So, without extending procfs it doesn't solve the problem this series is trying to solve. - The main argument *for* this was to support creating UFFDs for remote processes. But, that use case clearly calls for CAP_SYS_PTRACE, so to support this we could just use the UFFD syscall as-is. - Why not use a syscall? Access to syscalls is generally controlled by capabilities. We don't have a capability which is used for userfaultfd access without also granting more / other permissions as well, and adding a new capability was rejected [2]. - It's possible a LSM could be used to control access instead, but I have some concerns. I don't think this approach would be as easy to use, particularly if we were to try to solve this with something heavyweight like SELinux. Maybe we could pursue adding a new LSM specifically for this user case, but it may be too narrow of a case to justify that. [1]: https://patchwork.kernel.org/project/linux-mm/cover/20220719195628.3415852-1-axelrasmussen@google.com/ [2]: https://lore.kernel.org/lkml/686276b9-4530-2045-6bd8-170e5943abe4@schaufler-ca.com/T/ This patch (of 5): This not being included was just a simple oversight. There are certain features (like minor fault support) which are only enabled on shared mappings, so without including hugetlb_shared we actually lose a significant amount of test coverage. Link: https://lkml.kernel.org/r/20220808175614.3885028-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220808175614.3885028-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Shuah Khan Reviewed-by: Peter Xu Cc: Al Viro Cc: Dave Hansen Cc: Dmitry V. Levin Cc: Gleb Fotengauer-Malinovskiy Cc: Hugh Dickins Cc: Jan Kara Cc: Jonathan Corbet Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zhang Yi Cc: Mike Rapoport Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index de86983b8a0f..b8e7f6f38d64 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -121,9 +121,11 @@ run_test ./gup_test -a run_test ./gup_test -ct -F 0x1 0 19 0x1000 run_test ./userfaultfd anon 20 16 -# Test requires source and destination huge pages. Size of source -# (half_ufd_size_MB) is passed as argument to test. +# Hugetlb tests require source and destination huge pages. Pass in half the +# size ($half_ufd_size_MB), which is used for *each*. run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32 +run_test ./userfaultfd hugetlb_shared "$half_ufd_size_MB" 32 "$mnt"/uffd-test +rm -f "$mnt"/uffd-test run_test ./userfaultfd shmem 20 16 #cleanup -- cgit v1.2.3 From 77c07f7cca9fc7c99aa26058e4674980e0cbb186 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 8 Aug 2022 10:56:12 -0700 Subject: userfaultfd: selftests: modify selftest to use /dev/userfaultfd We clearly want to ensure both userfaultfd(2) and /dev/userfaultfd keep working into the future, so just run the test twice, using each interface. Instead of always testing both userfaultfd(2) and /dev/userfaultfd, let the user choose which to test. As with other test features, change the behavior based on a new command line flag. Introduce the idea of "test mods", which are generic (not specific to a test type) modifications to the behavior of the test. This is sort of borrowed from this RFC patch series [1], but simplified a bit. The benefit is, in "typical" configurations this test is somewhat slow (say, 30sec or something). Testing both clearly doubles it, so it may not always be desirable, as users are likely to use one or the other, but never both, in the "real world". [1]: https://patchwork.kernel.org/project/linux-mm/patch/20201129004548.1619714-14-namit@vmware.com/ [axelrasmussen@google.com: modify selftest to exit with KSFT_SKIP *only* when features are unsupported, per Mike] Link: https://lkml.kernel.org/r/20220819205201.658693-4-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220808175614.3885028-4-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Peter Xu Acked-by: Mike Rapoport Cc: Al Viro Cc: Dave Hansen Cc: Dmitry V. Levin Cc: Gleb Fotengauer-Malinovskiy Cc: Hugh Dickins Cc: Jan Kara Cc: Jonathan Corbet Cc: Mel Gorman Cc: Mike Kravetz Cc: Shuah Khan Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zhang Yi Cc: Mike Rapoport Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/userfaultfd.c | 76 +++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 10 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7c3f1b0ab468..7be709d9eed0 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -77,6 +77,11 @@ static int bounces; #define TEST_SHMEM 3 static int test_type; +#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY) + +/* test using /dev/userfaultfd, instead of userfaultfd(2) */ +static bool test_dev_userfaultfd; + /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 static volatile bool test_uffdio_copy_eexist = true; @@ -125,6 +130,8 @@ struct uffd_stats { const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" + "# Run the same anonymous memory test, but using /dev/userfaultfd:\n" + "./userfaultfd anon:dev 100 99999\n\n" "# Run share memory test on 1GiB region with 99 bounces:\n" "./userfaultfd shmem 1000 99\n\n" "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" @@ -141,6 +148,14 @@ static void usage(void) "[hugetlbfs_file]\n\n"); fprintf(stderr, "Supported : anon, hugetlb, " "hugetlb_shared, shmem\n\n"); + fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. " + "Supported mods:\n"); + fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n"); + fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n"); + fprintf(stderr, "\nExample test mod usage:\n"); + fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n"); + fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n"); + fprintf(stderr, "Examples:\n\n"); fprintf(stderr, "%s", examples); exit(1); @@ -154,12 +169,14 @@ static void usage(void) ret, __LINE__); \ } while (0) -#define err(fmt, ...) \ +#define errexit(exitcode, fmt, ...) \ do { \ _err(fmt, ##__VA_ARGS__); \ - exit(1); \ + exit(exitcode); \ } while (0) +#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) + static void uffd_stats_reset(struct uffd_stats *uffd_stats, unsigned long n_cpus) { @@ -383,13 +400,34 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls) } } +static int __userfaultfd_open_dev(void) +{ + int fd, _uffd; + + fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); + if (fd < 0) + errexit(KSFT_SKIP, "opening /dev/userfaultfd failed"); + + _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS); + if (_uffd < 0) + errexit(errno == ENOTTY ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + close(fd); + return _uffd; +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); - if (uffd < 0) - err("userfaultfd syscall not available in this kernel"); + if (test_dev_userfaultfd) + uffd = __userfaultfd_open_dev(); + else { + uffd = syscall(__NR_userfaultfd, UFFD_FLAGS); + if (uffd < 0) + errexit(errno == ENOSYS ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + } uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; @@ -1584,8 +1622,6 @@ unsigned long default_huge_page_size(void) static void set_test_type(const char *type) { - uint64_t features = UFFD_API_FEATURES; - if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; @@ -1603,9 +1639,29 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; test_uffdio_minor = true; - } else { - err("Unknown test type: %s", type); } +} + +static void parse_test_type_arg(const char *raw_type) +{ + char *buf = strdup(raw_type); + uint64_t features = UFFD_API_FEATURES; + + while (buf) { + const char *token = strsep(&buf, ":"); + + if (!test_type) + set_test_type(token); + else if (!strcmp(token, "dev")) + test_dev_userfaultfd = true; + else if (!strcmp(token, "syscall")) + test_dev_userfaultfd = false; + else + err("unrecognized test mod '%s'", token); + } + + if (!test_type) + err("failed to parse test type argument: '%s'", raw_type); if (test_type == TEST_HUGETLB) page_size = default_huge_page_size(); @@ -1653,7 +1709,7 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - set_test_type(argv[1]); + parse_test_type_arg(argv[1]); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / -- cgit v1.2.3 From 4a7e922587d2ea19278a6355dcb52043e47adbac Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 8 Aug 2022 10:56:14 -0700 Subject: selftests: vm: add /dev/userfaultfd test cases to run_vmtests.sh This new mode was recently added to the userfaultfd selftest. We want to exercise both userfaultfd(2) as well as /dev/userfaultfd, so add both test cases to the script. Link: https://lkml.kernel.org/r/20220808175614.3885028-6-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Shuah Khan Acked-by: Peter Xu Cc: Al Viro Cc: Dave Hansen Cc: Dmitry V. Levin Cc: Gleb Fotengauer-Malinovskiy Cc: Hugh Dickins Cc: Jan Kara Cc: Jonathan Corbet Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zhang Yi Cc: Mike Rapoport Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index b8e7f6f38d64..e780e76c26b8 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -120,13 +120,16 @@ run_test ./gup_test -a # Dump pages 0, 19, and 4096, using pin_user_pages: run_test ./gup_test -ct -F 0x1 0 19 0x1000 -run_test ./userfaultfd anon 20 16 -# Hugetlb tests require source and destination huge pages. Pass in half the -# size ($half_ufd_size_MB), which is used for *each*. -run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32 -run_test ./userfaultfd hugetlb_shared "$half_ufd_size_MB" 32 "$mnt"/uffd-test -rm -f "$mnt"/uffd-test -run_test ./userfaultfd shmem 20 16 +uffd_mods=("" ":dev") +for mod in "${uffd_mods[@]}"; do + run_test ./userfaultfd anon${mod} 20 16 + # Hugetlb tests require source and destination huge pages. Pass in half + # the size ($half_ufd_size_MB), which is used for *each*. + run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32 + run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test + rm -f "$mnt"/uffd-test + run_test ./userfaultfd shmem${mod} 20 16 +done #cleanup umount "$mnt" -- cgit v1.2.3 From 6f83d6c74ea5a5b267be85206822da280cae110a Mon Sep 17 00:00:00 2001 From: Tarun Sahu Date: Mon, 1 Aug 2022 12:32:31 +0530 Subject: Kselftests: remove support of libhugetlbfs from kselftests libhugetlbfs, the user side utitlity to work with hugepages, does not have any active support. There are only 2 selftests which are part of in vm/hmm_test.c that depends on libhugetlbfs. This patch modifies the tests so that they will not require libhugetlb library. [axelrasmussen@google.com: : remove orphaned references to local_config.{h,mk}] Link: https://lkml.kernel.org/r/20220831211526.2743216-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220801070231.13831-1-tsahu@linux.ibm.com Signed-off-by: Tarun Sahu Signed-off-by: Axel Rasmussen Tested-by: Zach O'Keefe Cc: "Aneesh Kumar K.V" Cc: Jerome Glisse Cc: Shivaprasad G Bhat Cc: Shuah Khan Cc: Vaibhav Jain Cc: Axel Rasmussen Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 21 +----- tools/testing/selftests/vm/check_config.sh | 31 --------- tools/testing/selftests/vm/hmm-tests.c | 108 +++++++++++++++++++---------- 3 files changed, 74 insertions(+), 86 deletions(-) delete mode 100644 tools/testing/selftests/vm/check_config.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d9fa6a9ea584..4ae879f70f4c 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests -LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h - -include local_config.mk +LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') @@ -152,23 +150,6 @@ endif $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap -# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. -$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) - $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma - -local_config.mk local_config.h: check_config.sh - /bin/sh ./check_config.sh $(CC) - -EXTRA_CLEAN += local_config.mk local_config.h - -ifeq ($(HMM_EXTRA_LIBS),) -all: warn_missing_hugelibs - -warn_missing_hugelibs: - @echo ; \ - echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \ - echo -endif diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh deleted file mode 100644 index 079c8a40b85d..000000000000 --- a/tools/testing/selftests/vm/check_config.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Probe for libraries and create header files to record the results. Both C -# header files and Makefile include fragments are created. - -OUTPUT_H_FILE=local_config.h -OUTPUT_MKFILE=local_config.mk - -# libhugetlbfs -tmpname=$(mktemp) -tmpfile_c=${tmpname}.c -tmpfile_o=${tmpname}.o - -echo "#include " > $tmpfile_c -echo "#include " >> $tmpfile_c -echo "int func(void) { return 0; }" >> $tmpfile_c - -CC=${1:?"Usage: $0 # example compiler: gcc"} -$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1 - -if [ -f $tmpfile_o ]; then - echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE - echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE -else - echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE - echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE - echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE -fi - -rm ${tmpname}.* diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 529f53b40296..f2c2c970eeb2 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -26,10 +26,6 @@ #include #include -#include "./local_config.h" -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS -#include -#endif /* * This is a private UAPI to the kernel test module so it isn't exported @@ -733,7 +729,54 @@ TEST_F(hmm, anon_write_huge) hmm_buffer_free(buffer); } -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS +/* + * Read numeric data from raw and tagged kernel status files. Used to read + * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag). + */ +static long file_read_ulong(char *file, const char *tag) +{ + int fd; + char buf[2048]; + int len; + char *p, *q; + long val; + + fd = open(file, O_RDONLY); + if (fd < 0) { + /* Error opening the file */ + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + /* Error in reading the file */ + return -1; + } + if (len == sizeof(buf)) { + /* Error file is too large */ + return -1; + } + buf[len] = '\0'; + + /* Search for a tag if provided */ + if (tag) { + p = strstr(buf, tag); + if (!p) + return -1; /* looks like the line we want isn't there */ + p += strlen(tag); + } else + p = buf; + + val = strtol(p, &q, 0); + if (*q != ' ') { + /* Error parsing the file */ + return -1; + } + + return val; +} + /* * Write huge TLBFS page. */ @@ -742,29 +785,27 @@ TEST_F(hmm, anon_write_hugetlbfs) struct hmm_buffer *buffer; unsigned long npages; unsigned long size; + unsigned long default_hsize; unsigned long i; int *ptr; int ret; - long pagesizes[4]; - int n, idx; - - /* Skip test if we can't allocate a hugetlbfs page. */ - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) SKIP(return, "Huge page size could not be determined"); - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); npages = size >> self->page_shift; buffer = malloc(sizeof(*buffer)); ASSERT_NE(buffer, NULL); - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { free(buffer); SKIP(return, "Huge page could not be allocated"); } @@ -788,11 +829,10 @@ TEST_F(hmm, anon_write_hugetlbfs) for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); - free_hugepage_region(buffer->ptr); + munmap(buffer->ptr, buffer->size); buffer->ptr = NULL; hmm_buffer_free(buffer); } -#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Read mmap'ed file memory. @@ -1467,7 +1507,6 @@ TEST_F(hmm2, snapshot) hmm_buffer_free(buffer); } -#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS /* * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that * should be mapped by a large page table entry. @@ -1477,30 +1516,30 @@ TEST_F(hmm, compound) struct hmm_buffer *buffer; unsigned long npages; unsigned long size; + unsigned long default_hsize; int *ptr; unsigned char *m; int ret; - long pagesizes[4]; - int n, idx; unsigned long i; /* Skip test if we can't allocate a hugetlbfs page. */ - n = gethugepagesizes(pagesizes, 4); - if (n <= 0) - return; - for (idx = 0; --n > 0; ) { - if (pagesizes[n] < pagesizes[idx]) - idx = n; - } - size = ALIGN(TWOMEG, pagesizes[idx]); + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) + SKIP(return, "Huge page size could not be determined"); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); npages = size >> self->page_shift; buffer = malloc(sizeof(*buffer)); ASSERT_NE(buffer, NULL); - buffer->ptr = get_hugepage_region(size, GHR_STRICT); - if (buffer->ptr == NULL) { + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { free(buffer); return; } @@ -1539,11 +1578,10 @@ TEST_F(hmm, compound) ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | HMM_DMIRROR_PROT_PMD); - free_hugepage_region(buffer->ptr); + munmap(buffer->ptr, buffer->size); buffer->ptr = NULL; hmm_buffer_free(buffer); } -#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */ /* * Test two devices reading the same memory (double mapped). -- cgit v1.2.3 From 50717ed380ed3d7a5ddfa33aad864e20e7b7b453 Mon Sep 17 00:00:00 2001 From: Tarun Sahu Date: Thu, 1 Sep 2022 14:53:15 +0530 Subject: selftest: vm: remove deleted local_config.* from .gitignore Commit d2d6cba5d6623 ("selftest: vm: remove orphaned references to local_config.{h,mk}") took care of removing orphaned references. This commit removes local_config from .gitignore. Parent patch commit 69007f156ba ("Kselftests: remove support of libhugetlbfs from kselftests") Link: https://lkml.kernel.org/r/20220901092315.33619-1-tsahu@linux.ibm.com Signed-off-by: Tarun Sahu Reviewed-by: Axel Rasmussen Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 31e5eea2a9b9..7b9dc2426f18 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -30,7 +30,6 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret -local_config.* soft-dirty split_huge_page_test ksm_tests -- cgit v1.2.3 From ca3d76b0aa808a06997297d123b66d17b81e5285 Mon Sep 17 00:00:00 2001 From: Jakub Matěna Date: Fri, 3 Jun 2022 16:57:19 +0200 Subject: mm: add merging after mremap resize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When mremap call results in expansion, it might be possible to merge the VMA with the next VMA which might become adjacent. This patch adds vma_merge call after the expansion is done to try and merge. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20220603145719.1012094-3-matenajakub@gmail.com Signed-off-by: Jakub Matěna Reviewed-by: Vlastimil Babka Cc: Hugh Dickins Cc: "Kirill A . Shutemov" Cc: Liam Howlett Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Steven Rostedt Signed-off-by: Andrew Morton --- mm/mremap.c | 19 +++++++++++-- tools/testing/selftests/vm/mremap_test.c | 49 +++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests') diff --git a/mm/mremap.c b/mm/mremap.c index 8644ff278f02..e465ffe279bb 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include #include #include @@ -1012,6 +1014,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, /* can we just expand the current mapping? */ if (vma_expandable(vma, new_len - old_len)) { long pages = (new_len - old_len) >> PAGE_SHIFT; + unsigned long extension_start = addr + old_len; + unsigned long extension_end = addr + new_len; + pgoff_t extension_pgoff = vma->vm_pgoff + (old_len >> PAGE_SHIFT); if (vma->vm_flags & VM_ACCOUNT) { if (security_vm_enough_memory_mm(mm, pages)) { @@ -1020,8 +1025,18 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } } - if (vma_adjust(vma, vma->vm_start, addr + new_len, - vma->vm_pgoff, NULL)) { + /* + * Function vma_merge() is called on the extension we are adding to + * the already existing vma, vma_merge() will merge this extension with + * the already existing vma (expand operation itself) and possibly also + * with the next vma if it becomes adjacent to the expanded vma and + * otherwise compatible. + */ + vma = vma_merge(mm, vma, extension_start, extension_end, + vma->vm_flags, vma->anon_vma, vma->vm_file, + extension_pgoff, vma_policy(vma), + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + if (!vma) { vm_unacct_memory(pages); ret = -ENOMEM; goto out; diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index db0270127aeb..9496346973d4 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -118,6 +118,50 @@ static unsigned long long get_mmap_min_addr(void) return addr; } +/* + * This test validates that merge is called when expanding a mapping. + * Mapping containing three pages is created, middle page is unmapped + * and then the mapping containing the first page is expanded so that + * it fills the created hole. The two parts should merge creating + * single mapping with three pages. + */ +static void mremap_expand_merge(unsigned long page_size) +{ + char *test_name = "mremap expand merge"; + FILE *fp; + char *line = NULL; + size_t len = 0; + bool success = false; + char *start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + munmap(start + page_size, page_size); + mremap(start, page_size, 2 * page_size, 0); + + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) { + ksft_test_result_fail("%s\n", test_name); + return; + } + + while (getline(&line, &len, fp) != -1) { + char *first = strtok(line, "- "); + void *first_val = (void *)strtol(first, NULL, 16); + char *second = strtok(NULL, "- "); + void *second_val = (void *) strtol(second, NULL, 16); + + if (first_val == start && second_val == start + 3 * page_size) { + success = true; + break; + } + } + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); + fclose(fp); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -336,6 +380,7 @@ int main(int argc, char **argv) int i, run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; unsigned int pattern_seed; + int num_expand_tests = 1; struct test test_cases[MAX_TEST]; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -407,12 +452,14 @@ int main(int argc, char **argv) (threshold_mb * _1MB >= _1GB); ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ? - ARRAY_SIZE(perf_test_cases) : 0)); + ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests); for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, pattern_seed); + mremap_expand_merge(page_size); + if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); -- cgit v1.2.3 From 6a760f58c792b6f7411f886271bb03f697464433 Mon Sep 17 00:00:00 2001 From: Mika Penttilä Date: Fri, 26 Aug 2022 08:06:31 +0300 Subject: mm/hmm/test: use char dev with struct device to get device node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HMM selftests use an in-kernel pseudo device to emulate device memory. The pseudo device registers a major device range for two or four pseudo device instances. User space has a script that reads /proc/devices in order to find the assigned major number, and sends that to mknod(1), once for each node. Change this to properly use cdev and struct device APIs. Delete the /proc/devices parsing from the user-space test script, now that it is unnecessary. Also, delete an unused field in struct dmirror_device: devmem. Link: https://lkml.kernel.org/r/20220826050631.25771-1-mpenttil@redhat.com Signed-off-by: Mika Penttilä Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Cc: Alistair Popple Cc: Ralph Campbell Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/test_hmm.c | 13 ++++++++++--- tools/testing/selftests/vm/test_hmm.sh | 10 ---------- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'tools/testing/selftests') diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e3965cafd27c..6a33f6b1b465 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -107,8 +107,8 @@ struct dmirror_chunk { */ struct dmirror_device { struct cdev cdevice; - struct hmm_devmem *devmem; unsigned int zone_device_type; + struct device device; unsigned int devmem_capacity; unsigned int devmem_count; @@ -1390,7 +1390,14 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id) cdev_init(&mdevice->cdevice, &dmirror_fops); mdevice->cdevice.owner = THIS_MODULE; - ret = cdev_add(&mdevice->cdevice, dev, 1); + device_initialize(&mdevice->device); + mdevice->device.devt = dev; + + ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id); + if (ret) + return ret; + + ret = cdev_device_add(&mdevice->cdevice, &mdevice->device); if (ret) return ret; @@ -1416,7 +1423,7 @@ static void dmirror_device_remove(struct dmirror_device *mdevice) kfree(mdevice->devmem_chunks); } - cdev_del(&mdevice->cdevice); + cdev_device_del(&mdevice->cdevice, &mdevice->device); } static int __init hmm_dmirror_init(void) diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh index 539c9371e592..46e19b5d648d 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -52,21 +52,11 @@ load_driver() usage fi fi - if [ $? == 0 ]; then - major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) - mknod /dev/hmm_dmirror0 c $major 0 - mknod /dev/hmm_dmirror1 c $major 1 - if [ $# -eq 2 ]; then - mknod /dev/hmm_dmirror2 c $major 2 - mknod /dev/hmm_dmirror3 c $major 3 - fi - fi } unload_driver() { modprobe -r $DRIVER > /dev/null 2>&1 - rm -f /dev/hmm_dmirror? } run_smoke() -- cgit v1.2.3 From ade38b8ca5ceeeb72e8d01357f3dcde7c87570cc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 9 Sep 2022 20:28:55 +0000 Subject: selftest/damon: add a test for duplicate context dirs creation Patch series "mm/damon: minor fixes and cleanups". This patchset contains minor fixes and cleanups for DAMON including - selftest for a bug we found before (Patch 1), - fix of region holes in vaddr corner case and a kunit test for it (Patches 2 and 3), and - documents/Kconfig updates for title wordsmithing (Patch 4) and more aggressive DAMON debugfs interface deprecation announcement (Patches 5-7). This patch (of 7): Commit d26f60703606 ("mm/damon/dbgfs: avoid duplicate context directory creation") fixes a bug which could result in memory leak and DAMON disablement. This commit adds a selftest for verifying the fix and avoid regression. Link: https://lkml.kernel.org/r/20220909202901.57977-1-sj@kernel.org Link: https://lkml.kernel.org/r/20220909202901.57977-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Jonathan Corbet Cc: Shuah Khan Cc: Yun Levi Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + .../damon/debugfs_duplicate_context_creation.sh | 27 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 0470c5f3e690..a1fa2eff8192 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,6 +6,7 @@ TEST_GEN_FILES += huge_count_read_write TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh +TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += sysfs.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh new file mode 100644 index 000000000000..4a76e37ef16b --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test duplicated context creation +# ================================ + +if ! echo foo > "$DBGFS/mk_contexts" +then + echo "context creation failed" + exit 1 +fi + +if echo foo > "$DBGFS/mk_contexts" +then + echo "duplicate context creation success" + exit 1 +fi + +if ! echo foo > "$DBGFS/rm_contexts" +then + echo "context deletion failed" + exit 1 +fi + +exit 0 -- cgit v1.2.3 From 3505c8e62acfb62908ffd7d2d6c5971657596d1d Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 11:46:51 -0700 Subject: selftests/vm: retry on EAGAIN for MADV_COLLAPSE selftest MADV_COLLAPSE is a best-effort request that will set errno to an actionable value if the request cannot be performed. For example, if pages are not found on the LRU, or if they are currently locked by something else, MADV_COLLAPSE will fail and set errno to EAGAIN to inform callers that they may try again. Since the khugepaged selftest is the first public use of MADV_COLLAPSE, set a best practice of checking errno and retrying on EAGAIN. Link: https://lkml.kernel.org/r/20220922184651.1016461-2-zokeefe@google.com Fixes: 9330694de59f ("selftests/vm: add MADV_COLLAPSE collapse context to selftests") Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index b77b1e28cdb3..b55dc331af13 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -477,6 +478,26 @@ static void fill_memory(int *p, unsigned long start, unsigned long end) p[i * page_size / sizeof(*p)] = i + 0xdead0000; } +/* + * MADV_COLLAPSE is a best-effort request and may fail if an internal + * resource is temporarily unavailable, in which case it will set errno to + * EAGAIN. In such a case, immediately reattempt the operation one more + * time. + */ +static int madvise_collapse_retry(void *p, unsigned long size) +{ + bool retry = true; + int ret; + +retry: + ret = madvise(p, size, MADV_COLLAPSE); + if (ret && errno == EAGAIN && retry) { + retry = false; + goto retry; + } + return ret; +} + /* * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with * validate_memory()'able contents. @@ -531,7 +552,7 @@ static void madvise_collapse(const char *msg, char *p, int nr_hpages, /* Clear VM_NOHUGEPAGE */ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); - ret = madvise(p, nr_hpages * hpage_pmd_size, MADV_COLLAPSE); + ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size); if (((bool)ret) == expect) fail("Fail: Bad return value"); else if (check_huge(p, nr_hpages) != expect) -- cgit v1.2.3 From c07c343cda8ef02985ac6583a2e5af892726f734 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:41 -0700 Subject: selftests/vm: dedup THP helpers These files: tools/testing/selftests/vm/vm_util.c tools/testing/selftests/vm/khugepaged.c Both contain logic to: 1) Determine hugepage size on current system 2) Read /proc/self/smaps to determine number of THPs at an address Refactor selftests/vm/khugepaged.c to use the vm_util common helpers and add it as a build dependency. Since selftests/vm/khugepaged.c is the largest user of check_huge(), change the signature of check_huge() to match selftests/vm/khugepaged.c's useage: take an expected number of hugepages, and return a bool indicating if the correct number of hugepages were found. Add a wrapper, check_huge_anon(), in anticipation of checking smaps for file and shmem hugepages. Update existing callsites to use the new pattern / function. Likewise, check_for_pattern() was duplicated, and it's a general enough helper to include in vm_util helpers as well. Link: https://lkml.kernel.org/r/20220907144521.3115321-6-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-6-zokeefe@google.com Signed-off-by: Zach O'Keefe Reviewed-by: Zi Yan Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/khugepaged.c | 64 +++-------------------- tools/testing/selftests/vm/soft-dirty.c | 2 +- tools/testing/selftests/vm/split_huge_page_test.c | 12 ++--- tools/testing/selftests/vm/vm_util.c | 26 +++++---- tools/testing/selftests/vm/vm_util.h | 3 +- 6 files changed, 32 insertions(+), 76 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4ae879f70f4c..c9c0996c122b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -95,6 +95,7 @@ TEST_FILES += va_128TBswitch.sh include ../lib.mk +$(OUTPUT)/khugepaged: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index b55dc331af13..235a64b4458c 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -12,6 +12,8 @@ #include #include +#include "vm_util.h" + #ifndef MADV_PAGEOUT #define MADV_PAGEOUT 21 #endif @@ -352,64 +354,12 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -#define MAX_LINE_LENGTH 500 - -static bool check_for_pattern(FILE *fp, char *pattern, char *buf) -{ - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { - if (!strncmp(buf, pattern, strlen(pattern))) - return true; - } - return false; -} - static bool check_huge(void *addr, int nr_hpages) { - bool thp = false; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(PID_SMAPS, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB", - nr_hpages * (hpage_pmd_size >> 10)); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - /* - * Fetch the AnonHugePages: in the same block and check whether it got - * the expected number of hugeepages next. - */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) - goto err_out; - - if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) - goto err_out; - - thp = true; -err_out: - fclose(fp); - return thp; + return check_huge_anon(addr, nr_hpages, hpage_pmd_size); } - +#define MAX_LINE_LENGTH 500 static bool check_swap(void *addr, unsigned long size) { bool swap = false; @@ -431,7 +381,7 @@ static bool check_swap(void *addr, unsigned long size) printf("%s: Failed to open file %s\n", __func__, PID_SMAPS); exit(EXIT_FAILURE); } - if (!check_for_pattern(fp, addr_pattern, buffer)) + if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) goto err_out; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB", @@ -444,7 +394,7 @@ static bool check_swap(void *addr, unsigned long size) * Fetch the Swap: in the same block and check whether it got * the expected number of hugeepages next. */ - if (!check_for_pattern(fp, "Swap:", buffer)) + if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer))) goto err_out; if (strncmp(buffer, addr_pattern, strlen(addr_pattern))) @@ -1066,7 +1016,7 @@ int main(int argc, const char **argv) setbuf(stdout, NULL); page_size = getpagesize(); - hpage_pmd_size = read_num("hpage_pmd_size"); + hpage_pmd_size = read_pmd_pagesize(); hpage_pmd_nr = hpage_pmd_size / page_size; default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c index e3a43f5d4fa2..21d8830c5f24 100644 --- a/tools/testing/selftests/vm/soft-dirty.c +++ b/tools/testing/selftests/vm/soft-dirty.c @@ -91,7 +91,7 @@ static void test_hugepage(int pagemap_fd, int pagesize) for (i = 0; i < hpage_len; i++) map[i] = (char)i; - if (check_huge(map)) { + if (check_huge_anon(map, 1, hpage_len)) { ksft_test_result_pass("Test %s huge page allocation\n", __func__); clear_softdirty(); diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 6aa2b8253aed..76e1c36dd9e5 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -92,7 +92,6 @@ void split_pmd_thp(void) { char *one_page; size_t len = 4 * pmd_pagesize; - uint64_t thp_size; size_t i; one_page = memalign(pmd_pagesize, len); @@ -107,8 +106,7 @@ void split_pmd_thp(void) for (i = 0; i < len; i++) one_page[i] = (char)i; - thp_size = check_huge(one_page); - if (!thp_size) { + if (!check_huge_anon(one_page, 1, pmd_pagesize)) { printf("No THP is allocated\n"); exit(EXIT_FAILURE); } @@ -124,9 +122,8 @@ void split_pmd_thp(void) } - thp_size = check_huge(one_page); - if (thp_size) { - printf("Still %ld kB AnonHugePages not split\n", thp_size); + if (check_huge_anon(one_page, 0, pmd_pagesize)) { + printf("Still AnonHugePages not split\n"); exit(EXIT_FAILURE); } @@ -172,8 +169,7 @@ void split_pte_mapped_thp(void) for (i = 0; i < len; i++) one_page[i] = (char)i; - thp_size = check_huge(one_page); - if (!thp_size) { + if (!check_huge_anon(one_page, 1, pmd_pagesize)) { printf("No THP is allocated\n"); exit(EXIT_FAILURE); } diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index b58ab11a7a30..9dae51b8219f 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -42,9 +42,9 @@ void clear_softdirty(void) ksft_exit_fail_msg("writing clear_refs failed\n"); } -static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) { - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + while (fgets(buf, len, fp)) { if (!strncmp(buf, pattern, strlen(pattern))) return true; } @@ -72,9 +72,10 @@ uint64_t read_pmd_pagesize(void) return strtoul(buf, NULL, 10); } -uint64_t check_huge(void *addr) +bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) { - uint64_t thp = 0; + uint64_t thp = -1; int ret; FILE *fp; char buffer[MAX_LINE_LENGTH]; @@ -89,20 +90,27 @@ uint64_t check_huge(void *addr) if (!fp) ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); - if (!check_for_pattern(fp, addr_pattern, buffer)) + if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) goto err_out; /* - * Fetch the AnonHugePages: in the same block and check the number of + * Fetch the pattern in the same block and check the number of * hugepages. */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) goto err_out; - if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) + snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); + + if (sscanf(buffer, addr_pattern, &thp) != 1) ksft_exit_fail_msg("Reading smap error\n"); err_out: fclose(fp); - return thp; + return thp == (nr_hpages * (hpage_size >> 10)); +} + +bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); } diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 2e512bd57ae1..8434ea0c95cd 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -5,5 +5,6 @@ uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); void clear_softdirty(void); +bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); -uint64_t check_huge(void *addr); +bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); -- cgit v1.2.3 From 8e638707a3f1a82dccbdc9285980329644946d4f Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:42 -0700 Subject: selftests/vm: modularize thp collapse memory operations Modularize operations to setup, cleanup, fault, and check for huge pages, for a given memory type. This allows reusing existing tests with additional memory types by defining new memory operations. Following patches will add file and shmem memory types. Link: https://lkml.kernel.org/r/20220907144521.3115321-7-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-7-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 373 ++++++++++++++++++-------------- 1 file changed, 207 insertions(+), 166 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 235a64b4458c..06ea6f18980e 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -29,8 +29,16 @@ static int hpage_pmd_nr; #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" +struct mem_ops { + void *(*setup_area)(int nr_hpages); + void (*cleanup_area)(void *p, unsigned long size); + void (*fault)(void *p, unsigned long start, unsigned long end); + bool (*check_huge)(void *addr, int nr_hpages); +}; + struct collapse_context { - void (*collapse)(const char *msg, char *p, int nr_hpages, bool expect); + void (*collapse)(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect); bool enforce_pte_scan_limits; }; @@ -354,11 +362,6 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -static bool check_huge(void *addr, int nr_hpages) -{ - return check_huge_anon(addr, nr_hpages, hpage_pmd_size); -} - #define MAX_LINE_LENGTH 500 static bool check_swap(void *addr, unsigned long size) { @@ -452,18 +455,33 @@ retry: * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with * validate_memory()'able contents. */ -static void *alloc_hpage(void) +static void *alloc_hpage(struct mem_ops *ops) { - void *p; + void *p = ops->setup_area(1); - p = alloc_mapping(1); + ops->fault(p, 0, hpage_pmd_size); + + /* + * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE. + * The latter is ineligible for collapse by MADV_COLLAPSE + * while the former might cause MADV_COLLAPSE to race with + * khugepaged on low-load system (like a test machine), which + * would cause MADV_COLLAPSE to fail with EAGAIN. + */ printf("Allocate huge page..."); - madvise(p, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p, 1)) - success("OK"); - else - fail("Fail"); + if (madvise_collapse_retry(p, hpage_pmd_size)) { + perror("madvise(MADV_COLLAPSE)"); + exit(EXIT_FAILURE); + } + if (!ops->check_huge(p, 1)) { + perror("madvise(MADV_COLLAPSE)"); + exit(EXIT_FAILURE); + } + if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) { + perror("madvise(MADV_HUGEPAGE)"); + exit(EXIT_FAILURE); + } + success("OK"); return p; } @@ -480,18 +498,40 @@ static void validate_memory(int *p, unsigned long start, unsigned long end) } } -static void madvise_collapse(const char *msg, char *p, int nr_hpages, - bool expect) +static void *anon_setup_area(int nr_hpages) +{ + return alloc_mapping(nr_hpages); +} + +static void anon_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); +} + +static void anon_fault(void *p, unsigned long start, unsigned long end) +{ + fill_memory(p, start, end); +} + +static bool anon_check_huge(void *addr, int nr_hpages) +{ + return check_huge_anon(addr, nr_hpages, hpage_pmd_size); +} + +static struct mem_ops anon_ops = { + .setup_area = &anon_setup_area, + .cleanup_area = &anon_cleanup_area, + .fault = &anon_fault, + .check_huge = &anon_check_huge, +}; + +static void __madvise_collapse(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect) { int ret; struct settings settings = *current_settings(); printf("%s...", msg); - /* Sanity check */ - if (!check_huge(p, 0)) { - printf("Unexpected huge page\n"); - exit(EXIT_FAILURE); - } /* * Prevent khugepaged interference and tests that MADV_COLLAPSE @@ -505,7 +545,7 @@ static void madvise_collapse(const char *msg, char *p, int nr_hpages, ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size); if (((bool)ret) == expect) fail("Fail: Bad return value"); - else if (check_huge(p, nr_hpages) != expect) + else if (!ops->check_huge(p, expect ? nr_hpages : 0)) fail("Fail: check_huge()"); else success("OK"); @@ -513,14 +553,26 @@ static void madvise_collapse(const char *msg, char *p, int nr_hpages, pop_settings(); } +static void madvise_collapse(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops, bool expect) +{ + /* Sanity check */ + if (!ops->check_huge(p, 0)) { + printf("Unexpected huge page\n"); + exit(EXIT_FAILURE); + } + __madvise_collapse(msg, p, nr_hpages, ops, expect); +} + #define TICK 500000 -static bool wait_for_scan(const char *msg, char *p, int nr_hpages) +static bool wait_for_scan(const char *msg, char *p, int nr_hpages, + struct mem_ops *ops) { int full_scans; int timeout = 6; /* 3 seconds */ /* Sanity check */ - if (!check_huge(p, 0)) { + if (!ops->check_huge(p, 0)) { printf("Unexpected huge page\n"); exit(EXIT_FAILURE); } @@ -532,7 +584,7 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages) printf("%s...", msg); while (timeout--) { - if (check_huge(p, nr_hpages)) + if (ops->check_huge(p, nr_hpages)) break; if (read_num("khugepaged/full_scans") >= full_scans) break; @@ -546,19 +598,20 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages) } static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, - bool expect) + struct mem_ops *ops, bool expect) { - if (wait_for_scan(msg, p, nr_hpages)) { + if (wait_for_scan(msg, p, nr_hpages, ops)) { if (expect) fail("Timeout"); else success("OK"); return; - } else if (check_huge(p, nr_hpages) == expect) { + } + + if (ops->check_huge(p, expect ? nr_hpages : 0)) success("OK"); - } else { + else fail("Fail"); - } } static void alloc_at_fault(void) @@ -572,7 +625,7 @@ static void alloc_at_fault(void) p = alloc_mapping(1); *p = 1; printf("Allocate huge page on fault..."); - if (check_huge(p, 1)) + if (check_huge_anon(p, 1, hpage_pmd_size)) success("OK"); else fail("Fail"); @@ -581,49 +634,48 @@ static void alloc_at_fault(void) madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); - if (check_huge(p, 0)) + if (check_huge_anon(p, 0, hpage_pmd_size)) success("OK"); else fail("Fail"); munmap(p, hpage_pmd_size); } -static void collapse_full(struct collapse_context *c) +static void collapse_full(struct collapse_context *c, struct mem_ops *ops) { void *p; int nr_hpages = 4; unsigned long size = nr_hpages * hpage_pmd_size; - p = alloc_mapping(nr_hpages); - fill_memory(p, 0, size); + p = ops->setup_area(nr_hpages); + ops->fault(p, 0, size); c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages, - true); + ops, true); validate_memory(p, 0, size); - munmap(p, size); + ops->cleanup_area(p, size); } -static void collapse_empty(struct collapse_context *c) +static void collapse_empty(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(1); - c->collapse("Do not collapse empty PTE table", p, 1, false); - munmap(p, hpage_pmd_size); + p = ops->setup_area(1); + c->collapse("Do not collapse empty PTE table", p, 1, ops, false); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_single_pte_entry(struct collapse_context *c) +static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(1); - fill_memory(p, 0, page_size); + p = ops->setup_area(1); + ops->fault(p, 0, page_size); c->collapse("Collapse PTE table with single PTE entry present", p, - 1, true); - validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + 1, ops, true); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_none(struct collapse_context *c) +static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; struct settings settings = *current_settings(); @@ -632,30 +684,30 @@ static void collapse_max_ptes_none(struct collapse_context *c) settings.khugepaged.max_ptes_none = max_ptes_none; push_settings(&settings); - p = alloc_mapping(1); + p = ops->setup_area(1); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, - !c->enforce_pte_scan_limits); + ops, !c->enforce_pte_scan_limits); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); if (c->enforce_pte_scan_limits) { - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); - c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); + c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops, true); validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); } - - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); pop_settings(); } -static void collapse_swapin_single_pte(struct collapse_context *c) +static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_mapping(1); - fill_memory(p, 0, hpage_pmd_size); + + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); printf("Swapout one page..."); if (madvise(p, page_size, MADV_PAGEOUT)) { @@ -669,20 +721,21 @@ static void collapse_swapin_single_pte(struct collapse_context *c) goto out; } - c->collapse("Collapse with swapping in single PTE entry", p, 1, true); + c->collapse("Collapse with swapping in single PTE entry", p, 1, ops, + true); validate_memory(p, 0, hpage_pmd_size); out: - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_swap(struct collapse_context *c) +static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); void *p; - p = alloc_mapping(1); + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr); if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) { perror("madvise(MADV_PAGEOUT)"); @@ -695,12 +748,12 @@ static void collapse_max_ptes_swap(struct collapse_context *c) goto out; } - c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, + c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops, !c->enforce_pte_scan_limits); validate_memory(p, 0, hpage_pmd_size); if (c->enforce_pte_scan_limits) { - fill_memory(p, 0, hpage_pmd_size); + ops->fault(p, 0, hpage_pmd_size); printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr); if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) { @@ -715,63 +768,65 @@ static void collapse_max_ptes_swap(struct collapse_context *c) } c->collapse("Collapse with max_ptes_swap pages swapped out", p, - 1, true); + 1, ops, true); validate_memory(p, 0, hpage_pmd_size); } out: - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_single_pte_entry_compound(struct collapse_context *c) +static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_hpage(); + p = alloc_hpage(ops); + madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse PTE table with single PTE mapping compound page", - p, 1, true); + p, 1, ops, true); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_full_of_compound(struct collapse_context *c) +static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops) { void *p; - p = alloc_hpage(); + p = alloc_hpage(ops); printf("Split huge page leaving single PTE page table full of compound pages..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - c->collapse("Collapse PTE table full of compound pages", p, 1, true); + c->collapse("Collapse PTE table full of compound pages", p, 1, ops, + true); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_compound_extreme(struct collapse_context *c) +static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops) { void *p; int i; - p = alloc_mapping(1); + p = ops->setup_area(1); for (i = 0; i < hpage_pmd_nr; i++) { printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...", i + 1, hpage_pmd_nr); madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE); - fill_memory(BASE_ADDR, 0, hpage_pmd_size); - if (!check_huge(BASE_ADDR, 1)) { + ops->fault(BASE_ADDR, 0, hpage_pmd_size); + if (!ops->check_huge(BASE_ADDR, 1)) { printf("Failed to allocate huge page\n"); exit(EXIT_FAILURE); } @@ -798,30 +853,30 @@ static void collapse_compound_extreme(struct collapse_context *c) } } - munmap(BASE_ADDR, hpage_pmd_size); - fill_memory(p, 0, hpage_pmd_size); - if (check_huge(p, 0)) + ops->cleanup_area(BASE_ADDR, hpage_pmd_size); + ops->fault(p, 0, hpage_pmd_size); + if (!ops->check_huge(p, 1)) success("OK"); else fail("Fail"); c->collapse("Collapse PTE table full of different compound pages", p, 1, - true); + ops, true); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_fork(struct collapse_context *c) +static void collapse_fork(struct collapse_context *c, struct mem_ops *ops) { int wstatus; void *p; - p = alloc_mapping(1); + p = ops->setup_area(1); printf("Allocate small page..."); - fill_memory(p, 0, page_size); - if (check_huge(p, 0)) + ops->fault(p, 0, page_size); + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); @@ -832,17 +887,17 @@ static void collapse_fork(struct collapse_context *c) skip_settings_restore = true; exit_status = 0; - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - fill_memory(p, page_size, 2 * page_size); + ops->fault(p, page_size, 2 * page_size); c->collapse("Collapse PTE table with single page shared with parent process", - p, 1, true); + p, 1, ops, true); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -850,27 +905,27 @@ static void collapse_fork(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has small page..."); - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); validate_memory(p, 0, page_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_fork_compound(struct collapse_context *c) +static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops) { int wstatus; void *p; - p = alloc_hpage(); + p = alloc_hpage(ops); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ skip_settings_restore = true; exit_status = 0; - if (check_huge(p, 1)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); @@ -878,20 +933,20 @@ static void collapse_fork_compound(struct collapse_context *c) printf("Split huge page PMD in child process..."); madvise(p, page_size, MADV_NOHUGEPAGE); madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); - fill_memory(p, 0, page_size); + ops->fault(p, 0, page_size); write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", - p, 1, true); + p, 1, ops, true); write_num("khugepaged/max_ptes_shared", current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -899,59 +954,59 @@ static void collapse_fork_compound(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p, 1)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void collapse_max_ptes_shared(struct collapse_context *c) +static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; - p = alloc_hpage(); + p = alloc_hpage(ops); printf("Share huge page over fork()..."); if (!fork()) { /* Do not touch settings on child exit */ skip_settings_restore = true; exit_status = 0; - if (check_huge(p, 1)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); - if (check_huge(p, 0)) + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Maybe collapse with max_ptes_shared exceeded", p, - 1, !c->enforce_pte_scan_limits); + 1, ops, !c->enforce_pte_scan_limits); if (c->enforce_pte_scan_limits) { printf("Trigger CoW on page %d of %d...", hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); - fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); - if (check_huge(p, 0)) + if (ops->check_huge(p, 0)) success("OK"); else fail("Fail"); c->collapse("Collapse with max_ptes_shared PTEs shared", - p, 1, true); + p, 1, ops, true); } validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); exit(exit_status); } @@ -959,42 +1014,28 @@ static void collapse_max_ptes_shared(struct collapse_context *c) exit_status += WEXITSTATUS(wstatus); printf("Check if parent still has huge page..."); - if (check_huge(p, 1)) + if (ops->check_huge(p, 1)) success("OK"); else fail("Fail"); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } -static void madvise_collapse_existing_thps(void) +static void madvise_collapse_existing_thps(struct collapse_context *c, + struct mem_ops *ops) { void *p; - int err; - p = alloc_mapping(1); - fill_memory(p, 0, hpage_pmd_size); + p = ops->setup_area(1); + ops->fault(p, 0, hpage_pmd_size); + c->collapse("Collapse fully populated PTE table...", p, 1, ops, true); + validate_memory(p, 0, hpage_pmd_size); - printf("Collapse fully populated PTE table..."); - /* - * Note that we don't set MADV_HUGEPAGE here, which - * also tests that VM_HUGEPAGE isn't required for - * MADV_COLLAPSE in "madvise" mode. - */ - err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p, 1)) { - success("OK"); - printf("Re-collapse PMD-mapped hugepage"); - err = madvise(p, hpage_pmd_size, MADV_COLLAPSE); - if (err == 0 && check_huge(p, 1)) - success("OK"); - else - fail("Fail"); - } else { - fail("Fail"); - } + /* c->collapse() will find a hugepage and complain - call directly. */ + __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true); validate_memory(p, 0, hpage_pmd_size); - munmap(p, hpage_pmd_size); + ops->cleanup_area(p, hpage_pmd_size); } int main(int argc, const char **argv) @@ -1034,37 +1075,37 @@ int main(int argc, const char **argv) c.collapse = &khugepaged_collapse; c.enforce_pte_scan_limits = true; - collapse_full(&c); - collapse_empty(&c); - collapse_single_pte_entry(&c); - collapse_max_ptes_none(&c); - collapse_swapin_single_pte(&c); - collapse_max_ptes_swap(&c); - collapse_single_pte_entry_compound(&c); - collapse_full_of_compound(&c); - collapse_compound_extreme(&c); - collapse_fork(&c); - collapse_fork_compound(&c); - collapse_max_ptes_shared(&c); + collapse_full(&c, &anon_ops); + collapse_empty(&c, &anon_ops); + collapse_single_pte_entry(&c, &anon_ops); + collapse_max_ptes_none(&c, &anon_ops); + collapse_swapin_single_pte(&c, &anon_ops); + collapse_max_ptes_swap(&c, &anon_ops); + collapse_single_pte_entry_compound(&c, &anon_ops); + collapse_full_of_compound(&c, &anon_ops); + collapse_compound_extreme(&c, &anon_ops); + collapse_fork(&c, &anon_ops); + collapse_fork_compound(&c, &anon_ops); + collapse_max_ptes_shared(&c, &anon_ops); } if (!strcmp(tests, "madvise") || !strcmp(tests, "all")) { printf("\n*** Testing context: madvise ***\n"); c.collapse = &madvise_collapse; c.enforce_pte_scan_limits = false; - collapse_full(&c); - collapse_empty(&c); - collapse_single_pte_entry(&c); - collapse_max_ptes_none(&c); - collapse_swapin_single_pte(&c); - collapse_max_ptes_swap(&c); - collapse_single_pte_entry_compound(&c); - collapse_full_of_compound(&c); - collapse_compound_extreme(&c); - collapse_fork(&c); - collapse_fork_compound(&c); - collapse_max_ptes_shared(&c); - madvise_collapse_existing_thps(); + collapse_full(&c, &anon_ops); + collapse_empty(&c, &anon_ops); + collapse_single_pte_entry(&c, &anon_ops); + collapse_max_ptes_none(&c, &anon_ops); + collapse_swapin_single_pte(&c, &anon_ops); + collapse_max_ptes_swap(&c, &anon_ops); + collapse_single_pte_entry_compound(&c, &anon_ops); + collapse_full_of_compound(&c, &anon_ops); + collapse_compound_extreme(&c, &anon_ops); + collapse_fork(&c, &anon_ops); + collapse_fork_compound(&c, &anon_ops); + collapse_max_ptes_shared(&c, &anon_ops); + madvise_collapse_existing_thps(&c, &anon_ops); } restore_settings(0); -- cgit v1.2.3 From 1b03d0d558a281f12f68e5917dfa781c3b94e074 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:43 -0700 Subject: selftests/vm: add thp collapse file and tmpfs testing Add memory operations for file-backed and tmpfs memory. Call existing tests with these new memory operations to test collapse functionality of khugepaged and MADV_COLLAPSE on file-backed and tmpfs memory. Not all tests are reusable; for example, collapse_swapin_single_pte() which checks swap usage. Refactor test arguments. Usage is now: Usage: ./khugepaged [dir] : : : [all|khugepaged|madvise] : [all|anon|file] "file,all" mem_type requires [dir] argument "file,all" mem_type requires kernel built with CONFIG_READ_ONLY_THP_FOR_FS=y if [dir] is a (sub)directory of a tmpfs mount, tmpfs must be mounted with huge=madvise option for khugepaged tests to work Refactor calling tests to make it clear what collapse context / memory operations they support, but only invoke tests requested by user. Also log what test is being ran, and with what context / memory, to make test logs more human readable. A new test file is created and deleted for every test to ensure no pages remain in the page cache between tests (tests also may attempt to collapse different amount of memory). For file-backed memory where the file is stored on a block device, disable /sys/block//queue/read_ahead_kb so that pages don't find their way into the page cache without the tests faulting them in. Add file and shmem wrappers to vm_utils check for file and shmem hugepages in smaps. [zokeefe@google.com: fix "add thp collapse file and tmpfs testing" for tmpfs] Link: https://lkml.kernel.org/r/20220913212517.3163701-1-zokeefe@google.com Link: https://lkml.kernel.org/r/20220907144521.3115321-8-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-8-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 475 ++++++++++++++++++++++++++++---- tools/testing/selftests/vm/vm_util.c | 10 + tools/testing/selftests/vm/vm_util.h | 2 + 3 files changed, 431 insertions(+), 56 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 06ea6f18980e..08de6141c2af 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -1,7 +1,9 @@ #define _GNU_SOURCE +#include #include #include #include +#include #include #include #include @@ -11,12 +13,21 @@ #include #include +#include +#include +#include +#include + +#include "linux/magic.h" #include "vm_util.h" #ifndef MADV_PAGEOUT #define MADV_PAGEOUT 21 #endif +#ifndef MADV_POPULATE_READ +#define MADV_POPULATE_READ 22 +#endif #ifndef MADV_COLLAPSE #define MADV_COLLAPSE 25 #endif @@ -28,20 +39,47 @@ static int hpage_pmd_nr; #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" +#define TEST_FILE "collapse_test_file" + +#define MAX_LINE_LENGTH 500 + +enum vma_type { + VMA_ANON, + VMA_FILE, + VMA_SHMEM, +}; struct mem_ops { void *(*setup_area)(int nr_hpages); void (*cleanup_area)(void *p, unsigned long size); void (*fault)(void *p, unsigned long start, unsigned long end); bool (*check_huge)(void *addr, int nr_hpages); + const char *name; }; +static struct mem_ops *file_ops; +static struct mem_ops *anon_ops; + struct collapse_context { void (*collapse)(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect); bool enforce_pte_scan_limits; + const char *name; +}; + +static struct collapse_context *khugepaged_context; +static struct collapse_context *madvise_context; + +struct file_info { + const char *dir; + char path[PATH_MAX]; + enum vma_type type; + int fd; + char dev_queue_read_ahead_path[PATH_MAX]; }; +static struct file_info finfo; + enum thp_enabled { THP_ALWAYS, THP_MADVISE, @@ -107,6 +145,7 @@ struct settings { enum shmem_enabled shmem_enabled; bool use_zero_page; struct khugepaged_settings khugepaged; + unsigned long read_ahead_kb; }; static struct settings saved_settings; @@ -125,6 +164,11 @@ static void fail(const char *msg) exit_status++; } +static void skip(const char *msg) +{ + printf(" \e[33m%s\e[0m\n", msg); +} + static int read_file(const char *path, char *buf, size_t buflen) { int fd; @@ -152,13 +196,19 @@ static int write_file(const char *path, const char *buf, size_t buflen) ssize_t numwritten; fd = open(path, O_WRONLY); - if (fd == -1) + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); return 0; + } numwritten = write(fd, buf, buflen - 1); close(fd); - if (numwritten < 1) + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); return 0; + } return (unsigned int) numwritten; } @@ -225,20 +275,11 @@ static void write_string(const char *name, const char *val) } } -static const unsigned long read_num(const char *name) +static const unsigned long _read_num(const char *path) { - char path[PATH_MAX]; char buf[21]; - int ret; - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - ret = read_file(path, buf, sizeof(buf)); - if (ret < 0) { + if (read_file(path, buf, sizeof(buf)) < 0) { perror("read_file(read_num)"); exit(EXIT_FAILURE); } @@ -246,10 +287,9 @@ static const unsigned long read_num(const char *name) return strtoul(buf, NULL, 10); } -static void write_num(const char *name, unsigned long num) +static const unsigned long read_num(const char *name) { char path[PATH_MAX]; - char buf[21]; int ret; ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); @@ -257,6 +297,12 @@ static void write_num(const char *name, unsigned long num) printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); } + return _read_num(path); +} + +static void _write_num(const char *path, unsigned long num) +{ + char buf[21]; sprintf(buf, "%ld", num); if (!write_file(path, buf, strlen(buf) + 1)) { @@ -265,6 +311,19 @@ static void write_num(const char *name, unsigned long num) } } +static void write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + _write_num(path, num); +} + static void write_settings(struct settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; @@ -284,6 +343,10 @@ static void write_settings(struct settings *settings) write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (file_ops && finfo.type == VMA_FILE) + _write_num(finfo.dev_queue_read_ahead_path, + settings->read_ahead_kb); } #define MAX_SETTINGS_DEPTH 4 @@ -354,6 +417,10 @@ static void save_settings(void) .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), .pages_to_scan = read_num("khugepaged/pages_to_scan"), }; + if (file_ops && finfo.type == VMA_FILE) + saved_settings.read_ahead_kb = + _read_num(finfo.dev_queue_read_ahead_path); + success("OK"); signal(SIGTERM, restore_settings); @@ -362,7 +429,90 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -#define MAX_LINE_LENGTH 500 +static void get_finfo(const char *dir) +{ + struct stat path_stat; + struct statfs fs; + char buf[1 << 10]; + char path[PATH_MAX]; + char *str, *end; + + finfo.dir = dir; + stat(finfo.dir, &path_stat); + if (!S_ISDIR(path_stat.st_mode)) { + printf("%s: Not a directory (%s)\n", __func__, finfo.dir); + exit(EXIT_FAILURE); + } + if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE, + finfo.dir) >= sizeof(finfo.path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + if (statfs(finfo.dir, &fs)) { + perror("statfs()"); + exit(EXIT_FAILURE); + } + finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE; + if (finfo.type == VMA_SHMEM) + return; + + /* Find owning device's queue/read_ahead_kb control */ + if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent", + major(path_stat.st_dev), minor(path_stat.st_dev)) + >= sizeof(path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file(read_num)"); + exit(EXIT_FAILURE); + } + if (strstr(buf, "DEVTYPE=disk")) { + /* Found it */ + if (snprintf(finfo.dev_queue_read_ahead_path, + sizeof(finfo.dev_queue_read_ahead_path), + "/sys/dev/block/%d:%d/queue/read_ahead_kb", + major(path_stat.st_dev), minor(path_stat.st_dev)) + >= sizeof(finfo.dev_queue_read_ahead_path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return; + } + if (!strstr(buf, "DEVTYPE=partition")) { + printf("%s: Unknown device type: %s\n", __func__, path); + exit(EXIT_FAILURE); + } + /* + * Partition of block device - need to find actual device. + * Using naming convention that devnameN is partition of + * device devname. + */ + str = strstr(buf, "DEVNAME="); + if (!str) { + printf("%s: Could not read: %s", __func__, path); + exit(EXIT_FAILURE); + } + str += 8; + end = str; + while (*end) { + if (isdigit(*end)) { + *end = '\0'; + if (snprintf(finfo.dev_queue_read_ahead_path, + sizeof(finfo.dev_queue_read_ahead_path), + "/sys/block/%s/queue/read_ahead_kb", + str) >= sizeof(finfo.dev_queue_read_ahead_path)) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return; + } + ++end; + } + printf("%s: Could not read: %s\n", __func__, path); + exit(EXIT_FAILURE); +} + static bool check_swap(void *addr, unsigned long size) { bool swap = false; @@ -518,11 +668,91 @@ static bool anon_check_huge(void *addr, int nr_hpages) return check_huge_anon(addr, nr_hpages, hpage_pmd_size); } -static struct mem_ops anon_ops = { +static void *file_setup_area(int nr_hpages) +{ + int fd; + void *p; + unsigned long size; + + unlink(finfo.path); /* Cleanup from previous failed tests */ + printf("Creating %s for collapse%s...", finfo.path, + finfo.type == VMA_SHMEM ? " (tmpfs)" : ""); + fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL, + 777); + if (fd < 0) { + perror("open()"); + exit(EXIT_FAILURE); + } + + size = nr_hpages * hpage_pmd_size; + p = alloc_mapping(nr_hpages); + fill_memory(p, 0, size); + write(fd, p, size); + close(fd); + munmap(p, size); + success("OK"); + + printf("Opening %s read only for collapse...", finfo.path); + finfo.fd = open(finfo.path, O_RDONLY, 777); + if (finfo.fd < 0) { + perror("open()"); + exit(EXIT_FAILURE); + } + p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, + MAP_PRIVATE, finfo.fd, 0); + if (p == MAP_FAILED || p != BASE_ADDR) { + perror("mmap()"); + exit(EXIT_FAILURE); + } + + /* Drop page cache */ + write_file("/proc/sys/vm/drop_caches", "3", 2); + success("OK"); + return p; +} + +static void file_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); + close(finfo.fd); + unlink(finfo.path); +} + +static void file_fault(void *p, unsigned long start, unsigned long end) +{ + if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) { + perror("madvise(MADV_POPULATE_READ"); + exit(EXIT_FAILURE); + } +} + +static bool file_check_huge(void *addr, int nr_hpages) +{ + switch (finfo.type) { + case VMA_FILE: + return check_huge_file(addr, nr_hpages, hpage_pmd_size); + case VMA_SHMEM: + return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); + default: + exit(EXIT_FAILURE); + return false; + } +} + +static struct mem_ops __anon_ops = { .setup_area = &anon_setup_area, .cleanup_area = &anon_cleanup_area, .fault = &anon_fault, .check_huge = &anon_check_huge, + .name = "anon", +}; + +static struct mem_ops __file_ops = { + .setup_area = &file_setup_area, + .cleanup_area = &file_cleanup_area, + .fault = &file_fault, + .check_huge = &file_check_huge, + .name = "file", }; static void __madvise_collapse(const char *msg, char *p, int nr_hpages, @@ -538,6 +768,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, * ignores /sys/kernel/mm/transparent_hugepage/enabled */ settings.thp_enabled = THP_NEVER; + settings.shmem_enabled = SHMEM_NEVER; push_settings(&settings); /* Clear VM_NOHUGEPAGE */ @@ -608,12 +839,37 @@ static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, return; } + /* + * For file and shmem memory, khugepaged only retracts pte entries after + * putting the new hugepage in the page cache. The hugepage must be + * subsequently refaulted to install the pmd mapping for the mm. + */ + if (ops != &__anon_ops) + ops->fault(p, 0, nr_hpages * hpage_pmd_size); + if (ops->check_huge(p, expect ? nr_hpages : 0)) success("OK"); else fail("Fail"); } +static struct collapse_context __khugepaged_context = { + .collapse = &khugepaged_collapse, + .enforce_pte_scan_limits = true, + .name = "khugepaged", +}; + +static struct collapse_context __madvise_context = { + .collapse = &madvise_collapse, + .enforce_pte_scan_limits = false, + .name = "madvise", +}; + +static bool is_tmpfs(struct mem_ops *ops) +{ + return ops == &__file_ops && finfo.type == VMA_SHMEM; +} + static void alloc_at_fault(void) { struct settings settings = *current_settings(); @@ -686,6 +942,13 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o p = ops->setup_area(1); + if (is_tmpfs(ops)) { + /* shmem pages always in the page cache */ + printf("tmpfs..."); + skip("Skip"); + goto skip; + } + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, ops, !c->enforce_pte_scan_limits); @@ -698,6 +961,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); } +skip: ops->cleanup_area(p, hpage_pmd_size); pop_settings(); } @@ -781,6 +1045,13 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c, struc p = alloc_hpage(ops); + if (is_tmpfs(ops)) { + /* MADV_DONTNEED won't evict tmpfs pages */ + printf("tmpfs..."); + skip("Skip"); + goto skip; + } + madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); printf("Split huge page leaving single PTE mapping compound page..."); madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED); @@ -792,6 +1063,7 @@ static void collapse_single_pte_entry_compound(struct collapse_context *c, struc c->collapse("Collapse PTE table with single PTE mapping compound page", p, 1, ops, true); validate_memory(p, 0, page_size); +skip: ops->cleanup_area(p, hpage_pmd_size); } @@ -1038,9 +1310,70 @@ static void madvise_collapse_existing_thps(struct collapse_context *c, ops->cleanup_area(p, hpage_pmd_size); } +static void usage(void) +{ + fprintf(stderr, "\nUsage: ./khugepaged [dir]\n\n"); + fprintf(stderr, "\t\t: :\n"); + fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); + fprintf(stderr, "\t\t: [all|anon|file]\n"); + fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); + fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); + fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); + fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); + fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + exit(1); +} + +static void parse_test_type(int argc, const char **argv) +{ + char *buf; + const char *token; + + if (argc == 1) { + /* Backwards compatibility */ + khugepaged_context = &__khugepaged_context; + madvise_context = &__madvise_context; + anon_ops = &__anon_ops; + return; + } + + buf = strdup(argv[1]); + token = strsep(&buf, ":"); + + if (!strcmp(token, "all")) { + khugepaged_context = &__khugepaged_context; + madvise_context = &__madvise_context; + } else if (!strcmp(token, "khugepaged")) { + khugepaged_context = &__khugepaged_context; + } else if (!strcmp(token, "madvise")) { + madvise_context = &__madvise_context; + } else { + usage(); + } + + if (!buf) + usage(); + + if (!strcmp(buf, "all")) { + file_ops = &__file_ops; + anon_ops = &__anon_ops; + } else if (!strcmp(buf, "anon")) { + anon_ops = &__anon_ops; + } else if (!strcmp(buf, "file")) { + file_ops = &__file_ops; + } else { + usage(); + } + + if (!file_ops) + return; + + if (argc != 3) + usage(); +} + int main(int argc, const char **argv) { - struct collapse_context c; struct settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, @@ -1051,8 +1384,20 @@ int main(int argc, const char **argv) .alloc_sleep_millisecs = 10, .scan_sleep_millisecs = 10, }, + /* + * When testing file-backed memory, the collapse path + * looks at how many pages are found in the page cache, not + * what pages are mapped. Disable read ahead optimization so + * pages don't find their way into the page cache unless + * we mem_ops->fault() them in. + */ + .read_ahead_kb = 0, }; - const char *tests = argc == 1 ? "all" : argv[1]; + + parse_test_type(argc, argv); + + if (file_ops) + get_finfo(argv[2]); setbuf(stdout, NULL); @@ -1070,43 +1415,61 @@ int main(int argc, const char **argv) alloc_at_fault(); - if (!strcmp(tests, "khugepaged") || !strcmp(tests, "all")) { - printf("\n*** Testing context: khugepaged ***\n"); - c.collapse = &khugepaged_collapse; - c.enforce_pte_scan_limits = true; - - collapse_full(&c, &anon_ops); - collapse_empty(&c, &anon_ops); - collapse_single_pte_entry(&c, &anon_ops); - collapse_max_ptes_none(&c, &anon_ops); - collapse_swapin_single_pte(&c, &anon_ops); - collapse_max_ptes_swap(&c, &anon_ops); - collapse_single_pte_entry_compound(&c, &anon_ops); - collapse_full_of_compound(&c, &anon_ops); - collapse_compound_extreme(&c, &anon_ops); - collapse_fork(&c, &anon_ops); - collapse_fork_compound(&c, &anon_ops); - collapse_max_ptes_shared(&c, &anon_ops); - } - if (!strcmp(tests, "madvise") || !strcmp(tests, "all")) { - printf("\n*** Testing context: madvise ***\n"); - c.collapse = &madvise_collapse; - c.enforce_pte_scan_limits = false; - - collapse_full(&c, &anon_ops); - collapse_empty(&c, &anon_ops); - collapse_single_pte_entry(&c, &anon_ops); - collapse_max_ptes_none(&c, &anon_ops); - collapse_swapin_single_pte(&c, &anon_ops); - collapse_max_ptes_swap(&c, &anon_ops); - collapse_single_pte_entry_compound(&c, &anon_ops); - collapse_full_of_compound(&c, &anon_ops); - collapse_compound_extreme(&c, &anon_ops); - collapse_fork(&c, &anon_ops); - collapse_fork_compound(&c, &anon_ops); - collapse_max_ptes_shared(&c, &anon_ops); - madvise_collapse_existing_thps(&c, &anon_ops); - } +#define TEST(t, c, o) do { \ + if (c && o) { \ + printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \ + t(c, o); \ + } \ + } while (0) + + TEST(collapse_full, khugepaged_context, anon_ops); + TEST(collapse_full, khugepaged_context, file_ops); + TEST(collapse_full, madvise_context, anon_ops); + TEST(collapse_full, madvise_context, file_ops); + + TEST(collapse_empty, khugepaged_context, anon_ops); + TEST(collapse_empty, madvise_context, anon_ops); + + TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); + TEST(collapse_single_pte_entry, khugepaged_context, file_ops); + TEST(collapse_single_pte_entry, madvise_context, anon_ops); + TEST(collapse_single_pte_entry, madvise_context, file_ops); + + TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_none, khugepaged_context, file_ops); + TEST(collapse_max_ptes_none, madvise_context, anon_ops); + TEST(collapse_max_ptes_none, madvise_context, file_ops); + + TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops); + TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops); + TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops); + TEST(collapse_single_pte_entry_compound, madvise_context, file_ops); + + TEST(collapse_full_of_compound, khugepaged_context, anon_ops); + TEST(collapse_full_of_compound, khugepaged_context, file_ops); + TEST(collapse_full_of_compound, madvise_context, anon_ops); + TEST(collapse_full_of_compound, madvise_context, file_ops); + + TEST(collapse_compound_extreme, khugepaged_context, anon_ops); + TEST(collapse_compound_extreme, madvise_context, anon_ops); + + TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops); + TEST(collapse_swapin_single_pte, madvise_context, anon_ops); + + TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_swap, madvise_context, anon_ops); + + TEST(collapse_fork, khugepaged_context, anon_ops); + TEST(collapse_fork, madvise_context, anon_ops); + + TEST(collapse_fork_compound, khugepaged_context, anon_ops); + TEST(collapse_fork_compound, madvise_context, anon_ops); + + TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops); + TEST(collapse_max_ptes_shared, madvise_context, anon_ops); + + TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); + TEST(madvise_collapse_existing_thps, madvise_context, file_ops); restore_settings(0); } diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c index 9dae51b8219f..f11f8adda521 100644 --- a/tools/testing/selftests/vm/vm_util.c +++ b/tools/testing/selftests/vm/vm_util.c @@ -114,3 +114,13 @@ bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) { return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); } + +bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); +} + +bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) +{ + return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); +} diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h index 8434ea0c95cd..5c35de454e08 100644 --- a/tools/testing/selftests/vm/vm_util.h +++ b/tools/testing/selftests/vm/vm_util.h @@ -8,3 +8,5 @@ void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); +bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); +bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); -- cgit v1.2.3 From d0d35b6010a8bcc12b986f51d29cf3a8635cdbb4 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:44 -0700 Subject: selftests/vm: add thp collapse shmem testing Add memory operations for shmem (memfd) memory, and reuse existing tests with the new memory operations. Shmem tests can be called with "shmem" mem_type, and shmem tests are ran with "all" mem_type as well. Link: https://lkml.kernel.org/r/20220907144521.3115321-9-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-9-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 57 +++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 08de6141c2af..eabbd2710096 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -59,6 +59,7 @@ struct mem_ops { static struct mem_ops *file_ops; static struct mem_ops *anon_ops; +static struct mem_ops *shmem_ops; struct collapse_context { void (*collapse)(const char *msg, char *p, int nr_hpages, @@ -739,6 +740,40 @@ static bool file_check_huge(void *addr, int nr_hpages) } } +static void *shmem_setup_area(int nr_hpages) +{ + void *p; + unsigned long size = nr_hpages * hpage_pmd_size; + + finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0); + if (finfo.fd < 0) { + perror("memfd_create()"); + exit(EXIT_FAILURE); + } + if (ftruncate(finfo.fd, size)) { + perror("ftruncate()"); + exit(EXIT_FAILURE); + } + p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd, + 0); + if (p != BASE_ADDR) { + perror("mmap()"); + exit(EXIT_FAILURE); + } + return p; +} + +static void shmem_cleanup_area(void *p, unsigned long size) +{ + munmap(p, size); + close(finfo.fd); +} + +static bool shmem_check_huge(void *addr, int nr_hpages) +{ + return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); +} + static struct mem_ops __anon_ops = { .setup_area = &anon_setup_area, .cleanup_area = &anon_cleanup_area, @@ -755,6 +790,14 @@ static struct mem_ops __file_ops = { .name = "file", }; +static struct mem_ops __shmem_ops = { + .setup_area = &shmem_setup_area, + .cleanup_area = &shmem_cleanup_area, + .fault = &anon_fault, + .check_huge = &shmem_check_huge, + .name = "shmem", +}; + static void __madvise_collapse(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect) { @@ -1315,7 +1358,7 @@ static void usage(void) fprintf(stderr, "\nUsage: ./khugepaged [dir]\n\n"); fprintf(stderr, "\t\t: :\n"); fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); - fprintf(stderr, "\t\t: [all|anon|file]\n"); + fprintf(stderr, "\t\t: [all|anon|file|shmem]\n"); fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); @@ -1357,10 +1400,13 @@ static void parse_test_type(int argc, const char **argv) if (!strcmp(buf, "all")) { file_ops = &__file_ops; anon_ops = &__anon_ops; + shmem_ops = &__shmem_ops; } else if (!strcmp(buf, "anon")) { anon_ops = &__anon_ops; } else if (!strcmp(buf, "file")) { file_ops = &__file_ops; + } else if (!strcmp(buf, "shmem")) { + shmem_ops = &__shmem_ops; } else { usage(); } @@ -1377,7 +1423,7 @@ int main(int argc, const char **argv) struct settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, - .shmem_enabled = SHMEM_NEVER, + .shmem_enabled = SHMEM_ADVISE, .use_zero_page = 0, .khugepaged = { .defrag = 1, @@ -1424,16 +1470,20 @@ int main(int argc, const char **argv) TEST(collapse_full, khugepaged_context, anon_ops); TEST(collapse_full, khugepaged_context, file_ops); + TEST(collapse_full, khugepaged_context, shmem_ops); TEST(collapse_full, madvise_context, anon_ops); TEST(collapse_full, madvise_context, file_ops); + TEST(collapse_full, madvise_context, shmem_ops); TEST(collapse_empty, khugepaged_context, anon_ops); TEST(collapse_empty, madvise_context, anon_ops); TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); TEST(collapse_single_pte_entry, khugepaged_context, file_ops); + TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops); TEST(collapse_single_pte_entry, madvise_context, anon_ops); TEST(collapse_single_pte_entry, madvise_context, file_ops); + TEST(collapse_single_pte_entry, madvise_context, shmem_ops); TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); TEST(collapse_max_ptes_none, khugepaged_context, file_ops); @@ -1447,8 +1497,10 @@ int main(int argc, const char **argv) TEST(collapse_full_of_compound, khugepaged_context, anon_ops); TEST(collapse_full_of_compound, khugepaged_context, file_ops); + TEST(collapse_full_of_compound, khugepaged_context, shmem_ops); TEST(collapse_full_of_compound, madvise_context, anon_ops); TEST(collapse_full_of_compound, madvise_context, file_ops); + TEST(collapse_full_of_compound, madvise_context, shmem_ops); TEST(collapse_compound_extreme, khugepaged_context, anon_ops); TEST(collapse_compound_extreme, madvise_context, anon_ops); @@ -1470,6 +1522,7 @@ int main(int argc, const char **argv) TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); TEST(madvise_collapse_existing_thps, madvise_context, file_ops); + TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); restore_settings(0); } -- cgit v1.2.3 From 69d9428ce97f28eb1ba8acee552cf46014663d2b Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:45 -0700 Subject: selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd This test tests that MADV_COLLAPSE acting on file/shmem memory for which (1) the file extent mapping by the memory is already a huge page in the page cache, and (2) the pmd mapping this memory in the target process is none. In practice, (1)+(2) is the state left over after khugepaged has successfully collapsed file/shmem memory for a target VMA, but the memory has not yet been refaulted. So, this test in-effect tests MADV_COLLAPSE racing with khugepaged to collapse the memory first. Link: https://lkml.kernel.org/r/20220907144521.3115321-10-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-10-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/khugepaged.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index eabbd2710096..64126c8cd561 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -1353,6 +1353,33 @@ static void madvise_collapse_existing_thps(struct collapse_context *c, ops->cleanup_area(p, hpage_pmd_size); } +/* + * Test race with khugepaged where page tables have been retracted and + * pmd cleared. + */ +static void madvise_retracted_page_tables(struct collapse_context *c, + struct mem_ops *ops) +{ + void *p; + int nr_hpages = 1; + unsigned long size = nr_hpages * hpage_pmd_size; + + p = ops->setup_area(nr_hpages); + ops->fault(p, 0, size); + + /* Let khugepaged collapse and leave pmd cleared */ + if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages, + ops)) { + fail("Timeout"); + return; + } + success("OK"); + c->collapse("Install huge PMD from page cache", p, nr_hpages, ops, + true); + validate_memory(p, 0, size); + ops->cleanup_area(p, size); +} + static void usage(void) { fprintf(stderr, "\nUsage: ./khugepaged [dir]\n\n"); @@ -1524,5 +1551,8 @@ int main(int argc, const char **argv) TEST(madvise_collapse_existing_thps, madvise_context, file_ops); TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); + TEST(madvise_retracted_page_tables, madvise_context, file_ops); + TEST(madvise_retracted_page_tables, madvise_context, shmem_ops); + restore_settings(0); } -- cgit v1.2.3 From 0f633baac0f1716200bbccc6430b6006d103d7b9 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 22 Sep 2022 15:40:46 -0700 Subject: selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory Add :collapse mod to userfaultfd selftest. Currently this mod is only valid for "shmem" test type, but could be used for other test types. When provided, memory allocated by ->allocate_area() will be hugepage-aligned enforced to be hugepage-sized. userfaultf_minor_test, after the UFFD-registered mapping has been populated by UUFD minor fault handler, attempt to MADV_COLLAPSE the UFFD-registered mapping to collapse the memory into a pmd-mapped THP. This test is meant to be a functional test of what occurs during UFFD-driven live migration of VMs backed by huge tmpfs where, after a hugepage-sized region has been successfully migrated (in native page-sized chunks, to avoid latency of fetched a hugepage over the network), we want to reclaim previous VM performance by remapping it at the PMD level. Link: https://lkml.kernel.org/r/20220907144521.3115321-11-zokeefe@google.com Link: https://lkml.kernel.org/r/20220922224046.1143204-11-zokeefe@google.com Signed-off-by: Zach O'Keefe Cc: Axel Rasmussen Cc: Chris Kennelly Cc: David Hildenbrand Cc: David Rientjes Cc: Hugh Dickins Cc: James Houghton Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Pasha Tatashin Cc: Peter Xu Cc: Rongwei Wang Cc: SeongJae Park Cc: Song Liu Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/userfaultfd.c | 171 ++++++++++++++++++++++++------- 2 files changed, 134 insertions(+), 38 deletions(-) (limited to 'tools/testing/selftests') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index c9c0996c122b..c687533374e6 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -99,6 +99,7 @@ $(OUTPUT)/khugepaged: vm_util.c $(OUTPUT)/madv_populate: vm_util.c $(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c +$(OUTPUT)/userfaultfd: vm_util.c ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7be709d9eed0..74babdbc02e5 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -61,10 +61,11 @@ #include #include "../kselftest.h" +#include "vm_util.h" #ifdef __NR_userfaultfd -static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size; #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -79,6 +80,8 @@ static int test_type; #define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY) +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + /* test using /dev/userfaultfd, instead of userfaultfd(2) */ static bool test_dev_userfaultfd; @@ -97,9 +100,10 @@ static int huge_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; -static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; +static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; static char *zeropage; pthread_attr_t attr; +static bool test_collapse; /* Userfaultfd test statistics */ struct uffd_stats { @@ -127,6 +131,8 @@ struct uffd_stats { #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#define factor_of_2(x) ((x) ^ ((x) & ((x) - 1))) + const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" @@ -152,6 +158,8 @@ static void usage(void) "Supported mods:\n"); fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n"); fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n"); + fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n" + "memory\n"); fprintf(stderr, "\nExample test mod usage:\n"); fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n"); fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n"); @@ -229,12 +237,10 @@ static void anon_release_pages(char *rel_area) err("madvise(MADV_DONTNEED) failed"); } -static void anon_allocate_area(void **alloc_area) +static void anon_allocate_area(void **alloc_area, bool is_src) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (*alloc_area == MAP_FAILED) - err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) @@ -252,7 +258,7 @@ static void hugetlb_release_pages(char *rel_area) } } -static void hugetlb_allocate_area(void **alloc_area) +static void hugetlb_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; char **alloc_area_alias; @@ -262,7 +268,7 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), -1, 0); else @@ -270,9 +276,9 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_SHARED | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); @@ -282,12 +288,12 @@ static void hugetlb_allocate_area(void **alloc_area) PROT_READ | PROT_WRITE, MAP_SHARED, huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } - if (*alloc_area == area_src) { + if (is_src) { alloc_area_alias = &area_src_alias; } else { alloc_area_alias = &area_dst_alias; @@ -310,21 +316,36 @@ static void shmem_release_pages(char *rel_area) err("madvise(MADV_REMOVE) failed"); } -static void shmem_allocate_area(void **alloc_area) +static void shmem_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; - bool is_src = alloc_area == (void **)&area_src; - unsigned long offset = is_src ? 0 : nr_pages * page_size; + size_t bytes = nr_pages * page_size; + unsigned long offset = is_src ? 0 : bytes; + char *p = NULL, *p_alias = NULL; + + if (test_collapse) { + p = BASE_PMD_ADDR; + if (!is_src) + /* src map + alias + interleaved hpages */ + p += 2 * (bytes + hpage_size); + p_alias = p; + p_alias += bytes; + p_alias += hpage_size; /* Prevent src/dst VMA merge */ + } - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); + if (test_collapse && *alloc_area != p) + err("mmap of memfd failed at %p", p); - area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); + if (test_collapse && area_alias != p_alias) + err("mmap of anonymous memory failed at %p", p_alias); if (is_src) area_src_alias = area_alias; @@ -337,28 +358,39 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) *start = (unsigned long)area_dst_alias + offset; } +static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +{ + if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size)) + err("Did not find expected %d number of hugepages", + expect_nr_hpages); +} + struct uffd_test_ops { - void (*allocate_area)(void **alloc_area); + void (*allocate_area)(void **alloc_area, bool is_src); void (*release_pages)(char *rel_area); void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); + void (*check_pmd_mapping)(void *p, int expect_nr_hpages); }; static struct uffd_test_ops anon_uffd_test_ops = { .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, .alias_mapping = noop_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops shmem_uffd_test_ops = { .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, .alias_mapping = shmem_alias_mapping, + .check_pmd_mapping = shmem_check_pmd_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops *uffd_test_ops; @@ -478,6 +510,7 @@ static void uffd_test_ctx_clear(void) munmap_area((void **)&area_src_alias); munmap_area((void **)&area_dst); munmap_area((void **)&area_dst_alias); + munmap_area((void **)&area_remap); } static void uffd_test_ctx_init(uint64_t features) @@ -486,8 +519,8 @@ static void uffd_test_ctx_init(uint64_t features) uffd_test_ctx_clear(); - uffd_test_ops->allocate_area((void **)&area_src); - uffd_test_ops->allocate_area((void **)&area_dst); + uffd_test_ops->allocate_area((void **)&area_src, true); + uffd_test_ops->allocate_area((void **)&area_dst, false); userfaultfd_open(&features); @@ -804,6 +837,7 @@ static void *uffd_poll_thread(void *arg) err("remove failure"); break; case UFFD_EVENT_REMAP: + area_remap = area_dst; /* save for later unmap */ area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } @@ -1256,13 +1290,30 @@ static int userfaultfd_sig_test(void) return userfaults != 0; } +void check_memory_contents(char *p) +{ + unsigned long i; + uint8_t expected_byte; + void *expected_page; + + if (posix_memalign(&expected_page, page_size, page_size)) + err("out of memory"); + + for (i = 0; i < nr_pages; ++i) { + expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); + memset(expected_page, expected_byte, page_size); + if (my_bcmp(expected_page, p + (i * page_size), page_size)) + err("unexpected page contents after minor fault"); + } + + free(expected_page); +} + static int userfaultfd_minor_test(void) { - struct uffdio_register uffdio_register; unsigned long p; + struct uffdio_register uffdio_register; pthread_t uffd_mon; - uint8_t expected_byte; - void *expected_page; char c; struct uffd_stats stats = { 0 }; @@ -1301,17 +1352,7 @@ static int userfaultfd_minor_test(void) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - - if (posix_memalign(&expected_page, page_size, page_size)) - err("out of memory"); - - for (p = 0; p < nr_pages; ++p) { - expected_byte = ~((uint8_t)(p % ((uint8_t)-1))); - memset(expected_page, expected_byte, page_size); - if (my_bcmp(expected_page, area_dst_alias + (p * page_size), - page_size)) - err("unexpected page contents after minor fault"); - } + check_memory_contents(area_dst_alias); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); @@ -1320,6 +1361,23 @@ static int userfaultfd_minor_test(void) uffd_stats_report(&stats, 1); + if (test_collapse) { + printf("testing collapse of uffd memory into PMD-mapped THPs:"); + if (madvise(area_dst_alias, nr_pages * page_size, + MADV_COLLAPSE)) + err("madvise(MADV_COLLAPSE)"); + + uffd_test_ops->check_pmd_mapping(area_dst, + nr_pages * page_size / + hpage_size); + /* + * This won't cause uffd-fault - it purely just makes sure there + * was no corruption. + */ + check_memory_contents(area_dst_alias); + printf(" done.\n"); + } + return stats.missing_faults != 0 || stats.minor_faults != nr_pages; } @@ -1656,6 +1714,8 @@ static void parse_test_type_arg(const char *raw_type) test_dev_userfaultfd = true; else if (!strcmp(token, "syscall")) test_dev_userfaultfd = false; + else if (!strcmp(token, "collapse")) + test_collapse = true; else err("unrecognized test mod '%s'", token); } @@ -1663,8 +1723,11 @@ static void parse_test_type_arg(const char *raw_type) if (!test_type) err("failed to parse test type argument: '%s'", raw_type); + if (test_collapse && test_type != TEST_SHMEM) + err("Unsupported test: %s", raw_type); + if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + page_size = hpage_size; else page_size = sysconf(_SC_PAGE_SIZE); @@ -1702,6 +1765,8 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + size_t bytes; + if (argc < 4) usage(); @@ -1709,11 +1774,41 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); + hpage_size = default_huge_page_size(); parse_test_type_arg(argv[1]); + bytes = atol(argv[2]) * 1024 * 1024; + + if (test_collapse && bytes & (hpage_size - 1)) + err("MiB must be multiple of %lu if :collapse mod set", + hpage_size >> 20); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / - nr_cpus; + + if (test_collapse) { + /* nr_cpus must divide (bytes / page_size), otherwise, + * area allocations of (nr_pages * paze_size) won't be a + * multiple of hpage_size, even if bytes is a multiple of + * hpage_size. + * + * This means that nr_cpus must divide (N * (2 << (H-P)) + * where: + * bytes = hpage_size * N + * hpage_size = 2 << H + * page_size = 2 << P + * + * And we want to chose nr_cpus to be the largest value + * satisfying this constraint, not larger than the number + * of online CPUs. Unfortunately, prime factorization of + * N and nr_cpus may be arbitrary, so have to search for it. + * Instead, just use the highest power of 2 dividing both + * nr_cpus and (bytes / page_size). + */ + int x = factor_of_2(nr_cpus); + int y = factor_of_2(bytes / page_size); + + nr_cpus = x < y ? x : y; + } + nr_pages_per_cpu = bytes / page_size / nr_cpus; if (!nr_pages_per_cpu) { _err("invalid MiB"); usage(); -- cgit v1.2.3 From e55b9f96860f6c6026cff97966a740576285e07b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 26 Sep 2022 09:57:04 -0400 Subject: mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol Since 2d1c498072de ("mm: memcontrol: make swap tracking an integral part of memory control"), CONFIG_MEMCG_SWAP hasn't been a user-visible config option anymore, it just means CONFIG_MEMCG && CONFIG_SWAP. Update the sites accordingly and drop the symbol. [ While touching the docs, remove two references to CONFIG_MEMCG_KMEM, which hasn't been a user-visible symbol for over half a decade. ] Link: https://lkml.kernel.org/r/20220926135704.400818-5-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Hugh Dickins Cc: Michal Hocko Cc: Roman Gushchin Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v1/memory.rst | 4 +--- arch/mips/configs/db1xxx_defconfig | 1 - arch/mips/configs/generic_defconfig | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/pseries_defconfig | 1 - arch/sh/configs/sdk7786_defconfig | 1 - arch/sh/configs/urquell_defconfig | 1 - include/linux/swap.h | 2 +- include/linux/swap_cgroup.h | 4 ++-- init/Kconfig | 5 ----- mm/Makefile | 4 +++- mm/memcontrol.c | 6 +++--- tools/testing/selftests/cgroup/config | 1 - 13 files changed, 10 insertions(+), 22 deletions(-) (limited to 'tools/testing/selftests') diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 2cc502a75ef6..5b86245450bd 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -299,7 +299,7 @@ Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by lruvec->lru_lock; PG_lru bit of page->flags is cleared before isolating a page from its LRU under lruvec->lru_lock. -2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) +2.7 Kernel Memory Extension ----------------------------------------------- With the Kernel memory extension, the Memory Controller is able to limit @@ -386,8 +386,6 @@ U != 0, K >= U: a. Enable CONFIG_CGROUPS b. Enable CONFIG_MEMCG -c. Enable CONFIG_MEMCG_SWAP (to use swap extension) -d. Enable CONFIG_MEMCG_KMEM (to use kmem extension) 3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) ------------------------------------------------------------------- diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index b8bd66300996..83cbdecb27e6 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -9,7 +9,6 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig index 714169e411cf..48e4e251779b 100644 --- a/arch/mips/configs/generic_defconfig +++ b/arch/mips/configs/generic_defconfig @@ -3,7 +3,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 49f49c263935..4acca5263404 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -17,7 +17,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index b571d084c148..fead14ebb1fc 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -16,7 +16,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_CGROUPS=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index a8662b6927ec..97b7356639ed 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -16,7 +16,6 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index cb2f56468fe0..be478f3148f2 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -14,7 +14,6 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_DEV_INITRD=y diff --git a/include/linux/swap.h b/include/linux/swap.h index fc8d98660326..a18cf4b7c724 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -666,7 +666,7 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) cgroup_throttle_swaprate(&folio->page, gfp); } -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index a12dd1c3966c..ae73a87775b3 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -4,7 +4,7 @@ #include -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); @@ -40,6 +40,6 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_MEMCG_SWAP */ +#endif #endif /* __LINUX_SWAP_CGROUP_H */ diff --git a/init/Kconfig b/init/Kconfig index 532362fcfe31..7d86cf6b3012 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -958,11 +958,6 @@ config MEMCG help Provides control over the memory footprint of tasks in a cgroup. -config MEMCG_SWAP - bool - depends on MEMCG && SWAP - default y - config MEMCG_KMEM bool depends on MEMCG && !SLOB diff --git a/mm/Makefile b/mm/Makefile index cc23b0052584..8e105e5b3e29 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -98,7 +98,9 @@ obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o -obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o +ifdef CONFIG_SWAP +obj-$(CONFIG_MEMCG) += swap_cgroup.o +endif obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_GUP_TEST) += gup_test.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 76bb0a18a2f3..61e05fc281fb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3423,7 +3423,7 @@ void split_page_memcg(struct page *head, unsigned int nr) css_get_many(&memcg->css, nr - 1); } -#ifdef CONFIG_MEMCG_SWAP +#ifdef CONFIG_SWAP /** * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record. * @entry: swap entry to be moved @@ -7296,7 +7296,7 @@ static int __init mem_cgroup_init(void) } subsys_initcall(mem_cgroup_init); -#ifdef CONFIG_MEMCG_SWAP +#ifdef CONFIG_SWAP static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) { while (!refcount_inc_not_zero(&memcg->id.ref)) { @@ -7788,4 +7788,4 @@ static int __init mem_cgroup_swap_init(void) } subsys_initcall(mem_cgroup_swap_init); -#endif /* CONFIG_MEMCG_SWAP */ +#endif /* CONFIG_SWAP */ diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config index 84fe884fad86..97d549ee894f 100644 --- a/tools/testing/selftests/cgroup/config +++ b/tools/testing/selftests/cgroup/config @@ -4,5 +4,4 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y -CONFIG_MEMCG_SWAP=y CONFIG_PAGE_COUNTER=y -- cgit v1.2.3