diff options
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r-- | fs/userfaultfd.c | 78 |
1 files changed, 28 insertions, 50 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3c421d06a18e..1d227b0fcf49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -14,7 +14,8 @@ #include <linux/list.h> #include <linux/hashtable.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include <linux/mm.h> #include <linux/poll.h> #include <linux/slab.h> @@ -137,8 +138,6 @@ out: * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. - * - * Returns: In case of success, returns not zero. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { @@ -266,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, { struct mm_struct *mm = ctx->mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; @@ -276,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -489,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } @@ -526,10 +529,11 @@ out: return ret; } -static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, - struct userfaultfd_wait_queue *ewq) +static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) { - int ret = 0; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) + goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); @@ -546,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { - ret = -1; __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + if (ewq->msg.event == UFFD_EVENT_FORK) { + struct userfaultfd_ctx *new; + + new = (struct userfaultfd_ctx *) + (unsigned long) + ewq->msg.arg.reserved.reserved1; + + userfaultfd_ctx_put(new); + } break; } @@ -565,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, * ctx may go away after this if the userfault pseudo fd is * already released. */ - +out: userfaultfd_ctx_put(ctx); - return ret; } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, @@ -625,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) return 0; } -static int dup_fctx(struct userfaultfd_fork_ctx *fctx) +static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; @@ -635,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx) ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; - return userfaultfd_event_wait_completion(ctx, &ewq); + userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { - int ret = 0; struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { - if (!ret) - ret = dup_fctx(fctx); + dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } @@ -688,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } -void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; @@ -698,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma, ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) - return; + return true; userfaultfd_ctx_get(ctx); up_read(&mm->mmap_sem); - *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ - msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; @@ -713,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma, userfaultfd_event_wait_completion(ctx, &ewq); - down_read(&mm->mmap_sem); + return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, @@ -774,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) } } -void userfaultfd_exit(struct mm_struct *mm) -{ - struct vm_area_struct *vma = mm->mmap; - - /* - * We can do the vma walk without locking because the caller - * (exit_mm) knows it now has exclusive access - */ - while (vma) { - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; - - if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { - struct userfaultfd_wait_queue ewq; - - userfaultfd_ctx_get(ctx); - - msg_init(&ewq.msg); - ewq.msg.event = UFFD_EVENT_EXIT; - - userfaultfd_event_wait_completion(ctx, &ewq); - - ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; - } - - vma = vma->vm_next; - } -} - static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; |