diff options
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r-- | fs/userfaultfd.c | 66 |
1 files changed, 44 insertions, 22 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b0d5897bc4e6..ef4b48d1ea42 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, goto out; WRITE_ONCE(uwq->waken, true); /* - * The implicit smp_mb__before_spinlock in try_to_wake_up() - * renders uwq->waken visible to other CPUs before the task is - * waken. + * The Program-Order guarantees provided by the scheduler + * ensure uwq->waken is visible before the task is woken. */ ret = wake_up_state(wq->private, mode); - if (ret) + if (ret) { /* * Wake only once, autoremove behavior. * - * After the effect of list_del_init is visible to the - * other CPUs, the waitqueue may disappear from under - * us, see the !list_empty_careful() in - * handle_userfault(). try_to_wake_up() has an - * implicit smp_mb__before_spinlock, and the - * wq->private is read before calling the extern - * function "wake_up_state" (which in turns calls - * try_to_wake_up). While the spin_lock;spin_unlock; - * wouldn't be enough, the smp_mb__before_spinlock is - * enough to avoid an explicit smp_mb() here. + * After the effect of list_del_init is visible to the other + * CPUs, the waitqueue may disappear from under us, see the + * !list_empty_careful() in handle_userfault(). + * + * try_to_wake_up() has an implicit smp_mb(), and the + * wq->private is read before calling the extern function + * "wake_up_state" (which in turns calls try_to_wake_up). */ list_del_init(&wq->entry); + } out: return ret; } @@ -181,7 +178,8 @@ static inline void msg_init(struct uffd_msg *msg) static inline struct uffd_msg userfault_msg(unsigned long address, unsigned int flags, - unsigned long reason) + unsigned long reason, + unsigned int features) { struct uffd_msg msg; msg_init(&msg); @@ -205,6 +203,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address, * write protect fault. */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; + if (features & UFFD_FEATURE_THREAD_ID) + msg.arg.pagefault.feat.ptid = task_pid_vnr(current); return msg; } @@ -373,13 +373,34 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP)); VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP)); + if (ctx->features & UFFD_FEATURE_SIGBUS) + goto out; + /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ - if (unlikely(ACCESS_ONCE(ctx->released))) + if (unlikely(ACCESS_ONCE(ctx->released))) { + /* + * Don't return VM_FAULT_SIGBUS in this case, so a non + * cooperative manager can close the uffd after the + * last UFFDIO_COPY, without risking to trigger an + * involuntary SIGBUS if the process was starting the + * userfaultfd while the userfaultfd was still armed + * (but after the last UFFDIO_COPY). If the uffd + * wasn't already closed when the userfault reached + * this point, that would normally be solved by + * userfaultfd_must_wait returning 'false'. + * + * If we were to return VM_FAULT_SIGBUS here, the non + * cooperative manager would be instead forced to + * always call UFFDIO_UNREGISTER before it can safely + * close the uffd. + */ + ret = VM_FAULT_NOPAGE; goto out; + } /* * Check that we can return VM_FAULT_RETRY. @@ -422,7 +443,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; - uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); + uwq.msg = userfault_msg(vmf->address, vmf->flags, reason, + ctx->features); uwq.ctx = ctx; uwq.waken = false; @@ -1197,7 +1219,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, struct uffdio_register __user *user_uffdio_register; unsigned long vm_flags, new_flags; bool found; - bool non_anon_pages; + bool basic_ioctls; unsigned long start, end, vma_end; user_uffdio_register = (struct uffdio_register __user *) arg; @@ -1263,7 +1285,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * Search for not compatible vmas. */ found = false; - non_anon_pages = false; + basic_ioctls = false; for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { cond_resched(); @@ -1302,8 +1324,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, /* * Note vmas containing huge pages */ - if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur)) - non_anon_pages = true; + if (is_vm_hugetlb_page(cur)) + basic_ioctls = true; found = true; } @@ -1374,7 +1396,7 @@ out_unlock: * userland which ioctls methods are guaranteed to * succeed on this range. */ - if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC : + if (put_user(basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC : UFFD_API_RANGE_IOCTLS, &user_uffdio_register->ioctls)) ret = -EFAULT; |