summaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c59
1 files changed, 47 insertions, 12 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 85959d8324df..43953e03c356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
struct userfaultfd_ctx *ctx;
+ bool waken;
};
struct userfaultfd_wake_range {
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
+ WRITE_ONCE(uwq->waken, true);
+ /*
+ * The implicit smp_mb__before_spinlock in try_to_wake_up()
+ * renders uwq->waken visible to other CPUs before the task is
+ * waken.
+ */
ret = wake_up_state(wq->private, mode);
if (ret)
/*
@@ -257,18 +264,19 @@ out:
* fatal_signal_pending()s, and the mmap_sem must be released before
* returning it.
*/
-int handle_userfault(struct fault_env *fe, unsigned long reason)
+int handle_userfault(struct vm_fault *vmf, unsigned long reason)
{
- struct mm_struct *mm = fe->vma->vm_mm;
+ struct mm_struct *mm = vmf->vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq;
int ret;
bool must_wait, return_to_userland;
+ long blocking_state;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
ret = VM_FAULT_SIGBUS;
- ctx = fe->vma->vm_userfaultfd_ctx.ctx;
+ ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
@@ -301,17 +309,18 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
* without first stopping userland access to the memory. For
* VM_UFFD_MISSING userfaults this is enough for now.
*/
- if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
+ if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) {
/*
* Validate the invariant that nowait must allow retry
* to be sure not to return SIGBUS erroneously on
* nowait invocations.
*/
- BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
+ BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
#ifdef CONFIG_DEBUG_VM
if (printk_ratelimit()) {
printk(KERN_WARNING
- "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
+ "FAULT_FLAG_ALLOW_RETRY missing %x\n",
+ vmf->flags);
dump_stack();
}
#endif
@@ -323,7 +332,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
* and wait.
*/
ret = VM_FAULT_RETRY;
- if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
+ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
goto out;
/* take the reference before dropping the mmap_sem */
@@ -331,12 +340,15 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(fe->address, fe->flags, reason);
+ uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx;
+ uwq.waken = false;
return_to_userland =
- (fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
+ (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
+ blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
+ TASK_KILLABLE;
spin_lock(&ctx->fault_pending_wqh.lock);
/*
@@ -349,11 +361,11 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
- set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
- TASK_KILLABLE);
+ set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
- must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
+ must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
+ reason);
up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
@@ -362,6 +374,29 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
+
+ /*
+ * False wakeups can orginate even from rwsem before
+ * up_read() however userfaults will wait either for a
+ * targeted wakeup on the specific uwq waitqueue from
+ * wake_userfault() or for signals or for uffd
+ * release.
+ */
+ while (!READ_ONCE(uwq.waken)) {
+ /*
+ * This needs the full smp_store_mb()
+ * guarantee as the state write must be
+ * visible to other CPUs before reading
+ * uwq.waken from other CPUs.
+ */
+ set_current_state(blocking_state);
+ if (READ_ONCE(uwq.waken) ||
+ READ_ONCE(ctx->released) ||
+ (return_to_userland ? signal_pending(current) :
+ fatal_signal_pending(current)))
+ break;
+ schedule();
+ }
}
__set_current_state(TASK_RUNNING);