diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 00:11:53 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 00:11:53 +0100 |
commit | 3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (patch) | |
tree | 06114247eb7760edca7b57cc0108a351ffe1971b /mm/migrate.c | |
parent | Merge tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kerne... (diff) | |
parent | mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() (diff) | |
download | linux-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.tar.xz linux-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.zip |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs
- Most the MM patches which precede the patches in Willy's tree: kasan,
pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap,
sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb,
userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp,
cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap,
zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits)
mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release()
Docs/ABI/testing: add DAMON sysfs interface ABI document
Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface
selftests/damon: add a test for DAMON sysfs interface
mm/damon/sysfs: support DAMOS stats
mm/damon/sysfs: support DAMOS watermarks
mm/damon/sysfs: support schemes prioritization
mm/damon/sysfs: support DAMOS quotas
mm/damon/sysfs: support DAMON-based Operation Schemes
mm/damon/sysfs: support the physical address space monitoring
mm/damon/sysfs: link DAMON for virtual address spaces monitoring
mm/damon: implement a minimal stub for sysfs-based DAMON interface
mm/damon/core: add number of each enum type values
mm/damon/core: allow non-exclusive DAMON start/stop
Docs/damon: update outdated term 'regions update interval'
Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling
Docs/vm/damon: call low level monitoring primitives the operations
mm/damon: remove unnecessary CONFIG_DAMON option
mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}()
mm/damon/dbgfs-test: fix is_target_id() change
...
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 116 |
1 files changed, 54 insertions, 62 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index c7da064b4781..bc9da3fd01aa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -51,6 +51,7 @@ #include <linux/oom.h> #include <linux/memory.h> #include <linux/random.h> +#include <linux/sched/sysctl.h> #include <asm/tlbflush.h> @@ -107,7 +108,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) /* Driver shouldn't use PG_isolated bit of page->flags */ WARN_ON_ONCE(PageIsolated(page)); - __SetPageIsolated(page); + SetPageIsolated(page); unlock_page(page); return 0; @@ -126,7 +127,7 @@ static void putback_movable_page(struct page *page) mapping = page_mapping(page); mapping->a_ops->putback_page(page); - __ClearPageIsolated(page); + ClearPageIsolated(page); } /* @@ -159,7 +160,7 @@ void putback_movable_pages(struct list_head *l) if (PageMovable(page)) putback_movable_page(page); else - __ClearPageIsolated(page); + ClearPageIsolated(page); unlock_page(page); put_page(page); } else { @@ -883,7 +884,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, VM_BUG_ON_PAGE(!PageIsolated(page), page); if (!PageMovable(page)) { rc = MIGRATEPAGE_SUCCESS; - __ClearPageIsolated(page); + ClearPageIsolated(page); goto out; } @@ -905,7 +906,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, * We clear PG_movable under page_lock so any compactor * cannot try to migrate this page. */ - __ClearPageIsolated(page); + ClearPageIsolated(page); } /* @@ -917,8 +918,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, page->mapping = NULL; if (likely(!is_zone_device_page(newpage))) - flush_dcache_page(newpage); - + flush_dcache_folio(page_folio(newpage)); } out: return rc; @@ -1092,7 +1092,7 @@ static int unmap_and_move(new_page_t get_new_page, if (unlikely(__PageMovable(page))) { lock_page(page); if (!PageMovable(page)) - __ClearPageIsolated(page); + ClearPageIsolated(page); unlock_page(page); } goto out; @@ -1351,7 +1351,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, bool is_thp = false; struct page *page; struct page *page2; - int swapwrite = current->flags & PF_SWAPWRITE; int rc, nr_subpages; LIST_HEAD(ret_pages); LIST_HEAD(thp_split_pages); @@ -1360,9 +1359,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, trace_mm_migrate_pages_start(mode, reason); - if (!swapwrite) - current->flags |= PF_SWAPWRITE; - thp_subpage_migration: for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { retry = 0; @@ -1517,9 +1513,6 @@ out: trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, nr_thp_failed, nr_thp_split, mode, reason); - if (!swapwrite) - current->flags &= ~PF_SWAPWRITE; - if (ret_succeeded) *ret_succeeded = nr_succeeded; @@ -1612,7 +1605,6 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, { struct vm_area_struct *vma; struct page *page; - unsigned int follflags; int err; mmap_read_lock(mm); @@ -1622,8 +1614,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, goto out; /* FOLL_DUMP to ignore special (like zero) pages */ - follflags = FOLL_GET | FOLL_DUMP; - page = follow_page(vma, addr, follflags); + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); err = PTR_ERR(page); if (IS_ERR(page)) @@ -1762,6 +1753,13 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, } /* + * The move_pages() man page does not have an -EEXIST choice, so + * use -EFAULT instead. + */ + if (err == -EEXIST) + err = -EFAULT; + + /* * If the page is already on the target node (!err), store the * node, otherwise, store the err. */ @@ -2034,16 +2032,27 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; int nr_pages = thp_nr_pages(page); + int order = compound_order(page); - VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page); + VM_BUG_ON_PAGE(order && !PageTransHuge(page), page); /* Do not migrate THP mapped by multiple processes */ if (PageTransHuge(page) && total_mapcount(page) > 1) return 0; /* Avoid migrating to a node that is nearly full */ - if (!migrate_balanced_pgdat(pgdat, nr_pages)) + if (!migrate_balanced_pgdat(pgdat, nr_pages)) { + int z; + + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)) + return 0; + for (z = pgdat->nr_zones - 1; z >= 0; z--) { + if (populated_zone(pgdat->node_zones + z)) + break; + } + wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE); return 0; + } if (isolate_lru_page(page)) return 0; @@ -2072,6 +2081,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, pg_data_t *pgdat = NODE_DATA(node); int isolated; int nr_remaining; + unsigned int nr_succeeded; LIST_HEAD(migratepages); new_page_t *new; bool compound; @@ -2110,7 +2120,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, list_add(&page->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, *new, NULL, node, - MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL); + MIGRATE_ASYNC, MR_NUMA_MISPLACED, + &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { list_del(&page->lru); @@ -2119,8 +2130,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, putback_lru_page(page); } isolated = 0; - } else - count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages); + } + if (nr_succeeded) { + count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded); + if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node)) + mod_node_page_state(pgdat, PGPROMOTE_SUCCESS, + nr_succeeded); + } BUG_ON(!list_empty(&migratepages)); return isolated; @@ -3082,18 +3098,21 @@ static int establish_migrate_target(int node, nodemask_t *used, if (best_distance != -1) { val = node_distance(node, migration_target); if (val > best_distance) - return NUMA_NO_NODE; + goto out_clear; } index = nd->nr; if (WARN_ONCE(index >= DEMOTION_TARGET_NODES, "Exceeds maximum demotion target nodes\n")) - return NUMA_NO_NODE; + goto out_clear; nd->nodes[index] = migration_target; nd->nr++; return migration_target; +out_clear: + node_clear(migration_target, *used); + return NUMA_NO_NODE; } /* @@ -3190,7 +3209,7 @@ again: /* * For callers that do not hold get_online_mems() already. */ -static void set_migration_target_nodes(void) +void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); @@ -3254,51 +3273,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int __init migrate_on_reclaim_init(void) +void __init migrate_on_reclaim_init(void) { - int ret; - node_demotion = kmalloc_array(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", - NULL, migration_offline_cpu); + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); /* - * In the unlikely case that this fails, the automatic - * migration targets may become suboptimal for nodes - * where N_CPU changes. With such a small impact in a - * rare case, do not bother trying to do anything special. + * At this point, all numa nodes with memory/CPus have their state + * properly set, so we can build the demotion order now. + * Let us hold the cpu_hotplug lock just, as we could possibily have + * CPU hotplug events during boot. */ - WARN_ON(ret < 0); - ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", - migration_online_cpu, NULL); - WARN_ON(ret < 0); - - hotplug_memory_notifier(migrate_on_reclaim_callback, 100); - return 0; + cpus_read_lock(); + set_migration_target_nodes(); + cpus_read_unlock(); } -late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; |