summaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-23 00:11:53 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-23 00:11:53 +0100
commit3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (patch)
tree06114247eb7760edca7b57cc0108a351ffe1971b /mm/migrate.c
parentMerge tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kerne... (diff)
parentmm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() (diff)
downloadlinux-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.tar.xz
linux-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.zip
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs - Most the MM patches which precede the patches in Willy's tree: kasan, pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap, sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb, userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp, cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap, zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits) mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() Docs/ABI/testing: add DAMON sysfs interface ABI document Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface selftests/damon: add a test for DAMON sysfs interface mm/damon/sysfs: support DAMOS stats mm/damon/sysfs: support DAMOS watermarks mm/damon/sysfs: support schemes prioritization mm/damon/sysfs: support DAMOS quotas mm/damon/sysfs: support DAMON-based Operation Schemes mm/damon/sysfs: support the physical address space monitoring mm/damon/sysfs: link DAMON for virtual address spaces monitoring mm/damon: implement a minimal stub for sysfs-based DAMON interface mm/damon/core: add number of each enum type values mm/damon/core: allow non-exclusive DAMON start/stop Docs/damon: update outdated term 'regions update interval' Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling Docs/vm/damon: call low level monitoring primitives the operations mm/damon: remove unnecessary CONFIG_DAMON option mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}() mm/damon/dbgfs-test: fix is_target_id() change ...
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c116
1 files changed, 54 insertions, 62 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index c7da064b4781..bc9da3fd01aa 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -51,6 +51,7 @@
#include <linux/oom.h>
#include <linux/memory.h>
#include <linux/random.h>
+#include <linux/sched/sysctl.h>
#include <asm/tlbflush.h>
@@ -107,7 +108,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
/* Driver shouldn't use PG_isolated bit of page->flags */
WARN_ON_ONCE(PageIsolated(page));
- __SetPageIsolated(page);
+ SetPageIsolated(page);
unlock_page(page);
return 0;
@@ -126,7 +127,7 @@ static void putback_movable_page(struct page *page)
mapping = page_mapping(page);
mapping->a_ops->putback_page(page);
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
}
/*
@@ -159,7 +160,7 @@ void putback_movable_pages(struct list_head *l)
if (PageMovable(page))
putback_movable_page(page);
else
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
unlock_page(page);
put_page(page);
} else {
@@ -883,7 +884,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
VM_BUG_ON_PAGE(!PageIsolated(page), page);
if (!PageMovable(page)) {
rc = MIGRATEPAGE_SUCCESS;
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
goto out;
}
@@ -905,7 +906,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
* We clear PG_movable under page_lock so any compactor
* cannot try to migrate this page.
*/
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
}
/*
@@ -917,8 +918,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
page->mapping = NULL;
if (likely(!is_zone_device_page(newpage)))
- flush_dcache_page(newpage);
-
+ flush_dcache_folio(page_folio(newpage));
}
out:
return rc;
@@ -1092,7 +1092,7 @@ static int unmap_and_move(new_page_t get_new_page,
if (unlikely(__PageMovable(page))) {
lock_page(page);
if (!PageMovable(page))
- __ClearPageIsolated(page);
+ ClearPageIsolated(page);
unlock_page(page);
}
goto out;
@@ -1351,7 +1351,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
bool is_thp = false;
struct page *page;
struct page *page2;
- int swapwrite = current->flags & PF_SWAPWRITE;
int rc, nr_subpages;
LIST_HEAD(ret_pages);
LIST_HEAD(thp_split_pages);
@@ -1360,9 +1359,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
trace_mm_migrate_pages_start(mode, reason);
- if (!swapwrite)
- current->flags |= PF_SWAPWRITE;
-
thp_subpage_migration:
for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
retry = 0;
@@ -1517,9 +1513,6 @@ out:
trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
nr_thp_failed, nr_thp_split, mode, reason);
- if (!swapwrite)
- current->flags &= ~PF_SWAPWRITE;
-
if (ret_succeeded)
*ret_succeeded = nr_succeeded;
@@ -1612,7 +1605,6 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
struct page *page;
- unsigned int follflags;
int err;
mmap_read_lock(mm);
@@ -1622,8 +1614,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
goto out;
/* FOLL_DUMP to ignore special (like zero) pages */
- follflags = FOLL_GET | FOLL_DUMP;
- page = follow_page(vma, addr, follflags);
+ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -1762,6 +1753,13 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
}
/*
+ * The move_pages() man page does not have an -EEXIST choice, so
+ * use -EFAULT instead.
+ */
+ if (err == -EEXIST)
+ err = -EFAULT;
+
+ /*
* If the page is already on the target node (!err), store the
* node, otherwise, store the err.
*/
@@ -2034,16 +2032,27 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
int page_lru;
int nr_pages = thp_nr_pages(page);
+ int order = compound_order(page);
- VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
+ VM_BUG_ON_PAGE(order && !PageTransHuge(page), page);
/* Do not migrate THP mapped by multiple processes */
if (PageTransHuge(page) && total_mapcount(page) > 1)
return 0;
/* Avoid migrating to a node that is nearly full */
- if (!migrate_balanced_pgdat(pgdat, nr_pages))
+ if (!migrate_balanced_pgdat(pgdat, nr_pages)) {
+ int z;
+
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
+ return 0;
+ for (z = pgdat->nr_zones - 1; z >= 0; z--) {
+ if (populated_zone(pgdat->node_zones + z))
+ break;
+ }
+ wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE);
return 0;
+ }
if (isolate_lru_page(page))
return 0;
@@ -2072,6 +2081,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
pg_data_t *pgdat = NODE_DATA(node);
int isolated;
int nr_remaining;
+ unsigned int nr_succeeded;
LIST_HEAD(migratepages);
new_page_t *new;
bool compound;
@@ -2110,7 +2120,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
- MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
+ MIGRATE_ASYNC, MR_NUMA_MISPLACED,
+ &nr_succeeded);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
@@ -2119,8 +2130,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
putback_lru_page(page);
}
isolated = 0;
- } else
- count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
+ }
+ if (nr_succeeded) {
+ count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
+ if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node))
+ mod_node_page_state(pgdat, PGPROMOTE_SUCCESS,
+ nr_succeeded);
+ }
BUG_ON(!list_empty(&migratepages));
return isolated;
@@ -3082,18 +3098,21 @@ static int establish_migrate_target(int node, nodemask_t *used,
if (best_distance != -1) {
val = node_distance(node, migration_target);
if (val > best_distance)
- return NUMA_NO_NODE;
+ goto out_clear;
}
index = nd->nr;
if (WARN_ONCE(index >= DEMOTION_TARGET_NODES,
"Exceeds maximum demotion target nodes\n"))
- return NUMA_NO_NODE;
+ goto out_clear;
nd->nodes[index] = migration_target;
nd->nr++;
return migration_target;
+out_clear:
+ node_clear(migration_target, *used);
+ return NUMA_NO_NODE;
}
/*
@@ -3190,7 +3209,7 @@ again:
/*
* For callers that do not hold get_online_mems() already.
*/
-static void set_migration_target_nodes(void)
+void set_migration_target_nodes(void)
{
get_online_mems();
__set_migration_target_nodes();
@@ -3254,51 +3273,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
return notifier_from_errno(0);
}
-/*
- * React to hotplug events that might affect the migration targets
- * like events that online or offline NUMA nodes.
- *
- * The ordering is also currently dependent on which nodes have
- * CPUs. That means we need CPU on/offline notification too.
- */
-static int migration_online_cpu(unsigned int cpu)
-{
- set_migration_target_nodes();
- return 0;
-}
-
-static int migration_offline_cpu(unsigned int cpu)
-{
- set_migration_target_nodes();
- return 0;
-}
-
-static int __init migrate_on_reclaim_init(void)
+void __init migrate_on_reclaim_init(void)
{
- int ret;
-
node_demotion = kmalloc_array(nr_node_ids,
sizeof(struct demotion_nodes),
GFP_KERNEL);
WARN_ON(!node_demotion);
- ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
- NULL, migration_offline_cpu);
+ hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
/*
- * In the unlikely case that this fails, the automatic
- * migration targets may become suboptimal for nodes
- * where N_CPU changes. With such a small impact in a
- * rare case, do not bother trying to do anything special.
+ * At this point, all numa nodes with memory/CPus have their state
+ * properly set, so we can build the demotion order now.
+ * Let us hold the cpu_hotplug lock just, as we could possibily have
+ * CPU hotplug events during boot.
*/
- WARN_ON(ret < 0);
- ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
- migration_online_cpu, NULL);
- WARN_ON(ret < 0);
-
- hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
- return 0;
+ cpus_read_lock();
+ set_migration_target_nodes();
+ cpus_read_unlock();
}
-late_initcall(migrate_on_reclaim_init);
#endif /* CONFIG_HOTPLUG_CPU */
bool numa_demotion_enabled = false;