summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorJann Horn <jannh@google.com>2023-07-28 06:13:21 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-28 18:44:06 +0200
commit6c21e066f9256ea1df6f88768f6ae1080b7cf509 (patch)
tree95326194f48628759a895e5d3dfcb766faf4b119 /mm/mempolicy.c
parentMerge tag 'net-6.5-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netd... (diff)
downloadlinux-6c21e066f9256ea1df6f88768f6ae1080b7cf509.tar.xz
linux-6c21e066f9256ea1df6f88768f6ae1080b7cf509.zip
mm/mempolicy: Take VMA lock before replacing policy
mbind() calls down into vma_replace_policy() without taking the per-VMA locks, replaces the VMA's vma->vm_policy pointer, and frees the old policy. That's bad; a concurrent page fault might still be using the old policy (in vma_alloc_folio()), resulting in use-after-free. Normally this will manifest as a use-after-free read first, but it can result in memory corruption, including because vma_alloc_folio() can call mpol_cond_put() on the freed policy, which conditionally changes the policy's refcount member. This bug is specific to CONFIG_NUMA, but it does also affect non-NUMA systems as long as the kernel was built with CONFIG_NUMA. Signed-off-by: Jann Horn <jannh@google.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Fixes: 5e31275cc997 ("mm: add per-VMA lock and helper functions to control it") Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to '')
-rw-r--r--mm/mempolicy.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index edc25195f5bd..c53f8beeb507 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -384,8 +384,10 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
VMA_ITERATOR(vmi, mm, 0);
mmap_write_lock(mm);
- for_each_vma(vmi, vma)
+ for_each_vma(vmi, vma) {
+ vma_start_write(vma);
mpol_rebind_policy(vma->vm_policy, new);
+ }
mmap_write_unlock(mm);
}
@@ -768,6 +770,8 @@ static int vma_replace_policy(struct vm_area_struct *vma,
struct mempolicy *old;
struct mempolicy *new;
+ vma_assert_write_locked(vma);
+
pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
vma->vm_start, vma->vm_end, vma->vm_pgoff,
vma->vm_ops, vma->vm_file,
@@ -1313,6 +1317,14 @@ static long do_mbind(unsigned long start, unsigned long len,
if (err)
goto mpol_out;
+ /*
+ * Lock the VMAs before scanning for pages to migrate, to ensure we don't
+ * miss a concurrently inserted page.
+ */
+ vma_iter_init(&vmi, mm, start);
+ for_each_vma_range(vmi, vma, end)
+ vma_start_write(vma);
+
ret = queue_pages_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
@@ -1538,6 +1550,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
break;
}
+ vma_start_write(vma);
new->home_node = home_node;
err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);