diff options
author | Dave Airlie <airlied@redhat.com> | 2015-04-20 03:32:26 +0200 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-04-20 05:05:20 +0200 |
commit | 2c33ce009ca2389dbf0535d0672214d09738e35e (patch) | |
tree | 6186a6458c3c160385d794a23eaf07c786a9e61b /fs/aio.c | |
parent | media-bus: Fixup RGB444_1X12, RGB565_1X16, and YUV8_1X24 media bus format (diff) | |
parent | Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/len... (diff) | |
download | linux-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.xz linux-2c33ce009ca2389dbf0535d0672214d09738e35e.zip |
Merge Linus master into drm-next
The merge is clean, but the arm build fails afterwards,
due to API changes in the regulator tree.
I've included the patch into the merge to fix the build.
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 278 |
1 files changed, 134 insertions, 144 deletions
@@ -77,6 +77,11 @@ struct kioctx_cpu { unsigned reqs_available; }; +struct ctx_rq_wait { + struct completion comp; + atomic_t count; +}; + struct kioctx { struct percpu_ref users; atomic_t dead; @@ -115,7 +120,7 @@ struct kioctx { /* * signals when all in-flight requests are done */ - struct completion *requests_done; + struct ctx_rq_wait *rq_wait; struct { /* @@ -151,6 +156,38 @@ struct kioctx { unsigned id; }; +/* + * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either + * cancelled or completed (this makes a certain amount of sense because + * successful cancellation - io_cancel() - does deliver the completion to + * userspace). + * + * And since most things don't implement kiocb cancellation and we'd really like + * kiocb completion to be lockless when possible, we use ki_cancel to + * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED + * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). + */ +#define KIOCB_CANCELLED ((void *) (~0ULL)) + +struct aio_kiocb { + struct kiocb common; + + struct kioctx *ki_ctx; + kiocb_cancel_fn *ki_cancel; + + struct iocb __user *ki_user_iocb; /* user's aiocb */ + __u64 ki_user_data; /* user's data for completion */ + + struct list_head ki_list; /* the aio core uses this + * for cancellation */ + + /* + * If the aio_resfd field of the userspace iocb is not zero, + * this is the underlying eventfd context to deliver events to. + */ + struct eventfd_ctx *ki_eventfd; +}; + /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); unsigned long aio_nr; /* current system wide number of aio requests */ @@ -220,7 +257,7 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); - kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); @@ -278,11 +315,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_remap(struct file *file, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct kioctx_table *table; - int i; + int i, res = -EINVAL; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -292,13 +329,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) ctx = table->table[i]; if (ctx && ctx->aio_ring_file == file) { - ctx->user_id = ctx->mmap_base = vma->vm_start; + if (!atomic_read(&ctx->dead)) { + ctx->user_id = ctx->mmap_base = vma->vm_start; + res = 0; + } break; } } rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + return res; } static const struct file_operations aio_ring_fops = { @@ -480,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); struct kioctx *ctx = req->ki_ctx; unsigned long flags; @@ -496,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kiocb *kiocb) +static int kiocb_cancel(struct aio_kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -514,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb) cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); - return cancel(kiocb); + return cancel(&kiocb->common); } static void free_ioctx(struct work_struct *work) @@ -535,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) struct kioctx *ctx = container_of(ref, struct kioctx, reqs); /* At this point we know that there are no any in-flight requests */ - if (ctx->requests_done) - complete(ctx->requests_done); + if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) + complete(&ctx->rq_wait->comp); INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); @@ -550,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - struct kiocb *req; + struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, - struct kiocb, ki_list); + struct aio_kiocb, ki_list); list_del_init(&req->ki_list); kiocb_cancel(req); @@ -655,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) nr_events *= 2; /* Prevent overflows */ - if ((nr_events > (0x10000000U / sizeof(struct io_event))) || - (nr_events > (0x10000000U / sizeof(struct kiocb)))) { + if (nr_events > (0x10000000U / sizeof(struct io_event))) { pr_debug("ENOMEM: nr_events too high\n"); return ERR_PTR(-EINVAL); } @@ -727,6 +768,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: + atomic_set(&ctx->dead, 1); + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); aio_free_ring(ctx); err: mutex_unlock(&ctx->ring_lock); @@ -744,15 +788,16 @@ err: * the rapid destruction of the kioctx. */ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, - struct completion *requests_done) + struct ctx_rq_wait *wait) { struct kioctx_table *table; - if (atomic_xchg(&ctx->dead, 1)) + spin_lock(&mm->ioctx_lock); + if (atomic_xchg(&ctx->dead, 1)) { + spin_unlock(&mm->ioctx_lock); return -EINVAL; + } - - spin_lock(&mm->ioctx_lock); table = rcu_dereference_raw(mm->ioctx_table); WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; @@ -773,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); - ctx->requests_done = requests_done; + ctx->rq_wait = wait; percpu_ref_kill(&ctx->users); return 0; } -/* wait_on_sync_kiocb: - * Waits on the given sync kiocb to complete. - */ -ssize_t wait_on_sync_kiocb(struct kiocb *req) -{ - while (!req->ki_ctx) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (req->ki_ctx) - break; - io_schedule(); - } - __set_current_state(TASK_RUNNING); - return req->ki_user_data; -} -EXPORT_SYMBOL(wait_on_sync_kiocb); - /* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be @@ -805,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); void exit_aio(struct mm_struct *mm) { struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); - int i; + struct ctx_rq_wait wait; + int i, skipped; if (!table) return; + atomic_set(&wait.count, table->nr); + init_completion(&wait.comp); + + skipped = 0; for (i = 0; i < table->nr; ++i) { struct kioctx *ctx = table->table[i]; - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); - if (!ctx) + if (!ctx) { + skipped++; continue; + } + /* * We don't need to bother with munmap() here - exit_mmap(mm) * is coming and it'll unmap everything. And we simply can't, @@ -825,10 +860,12 @@ void exit_aio(struct mm_struct *mm) * that it needs to unmap the area, just set it to 0. */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx, &requests_done); + kill_ioctx(mm, ctx, &wait); + } + if (!atomic_sub_and_test(skipped, &wait.count)) { /* Wait until all IO for the context are done. */ - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); } RCU_INIT_POINTER(mm->ioctx_table, NULL); @@ -948,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) * Allocate a slot for an aio request. * Returns NULL if no requests are free. */ -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { - struct kiocb *req; + struct aio_kiocb *req; if (!get_reqs_available(ctx)) { user_refill_reqs_available(ctx); @@ -971,10 +1008,10 @@ out_put: return NULL; } -static void kiocb_free(struct kiocb *req) +static void kiocb_free(struct aio_kiocb *req) { - if (req->ki_filp) - fput(req->ki_filp); + if (req->common.ki_filp) + fput(req->common.ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); kmem_cache_free(kiocb_cachep, req); @@ -1010,8 +1047,9 @@ out: /* aio_complete * Called when the io request on the given iocb is complete. */ -void aio_complete(struct kiocb *iocb, long res, long res2) +static void aio_complete(struct kiocb *kiocb, long res, long res2) { + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; @@ -1025,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - if (is_sync_kiocb(iocb)) { - iocb->ki_user_data = res; - smp_wmb(); - iocb->ki_ctx = ERR_PTR(-EXDEV); - wake_up_process(iocb->ki_obj.tsk); - return; - } + BUG_ON(is_sync_kiocb(kiocb)); if (iocb->ki_list.next) { unsigned long flags; @@ -1057,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - event->obj = (u64)(unsigned long)iocb->ki_obj.user; + event->obj = (u64)(unsigned long)iocb->ki_user_iocb; event->data = iocb->ki_user_data; event->res = res; event->res2 = res2; @@ -1066,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, + ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, res, res2); /* after flagging the request as done, we @@ -1113,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) percpu_ref_put(&ctx->reqs); } -EXPORT_SYMBOL(aio_complete); /* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of @@ -1313,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - struct completion requests_done = - COMPLETION_INITIALIZER_ONSTACK(requests_done); + struct ctx_rq_wait wait; int ret; + init_completion(&wait.comp); + atomic_set(&wait.count, 1); + /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - ret = kill_ioctx(current->mm, ioctx, &requests_done); + ret = kill_ioctx(current->mm, ioctx, &wait); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel @@ -1329,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) * is destroyed. */ if (!ret) - wait_for_completion(&requests_done); + wait_for_completion(&wait.comp); return ret; } @@ -1337,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) return -EINVAL; } -typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, - unsigned long, loff_t); typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); -static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec **iovec, - bool compat) +static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, + struct iovec **iovec, + bool compat, + struct iov_iter *iter) { - ssize_t ret; - - *nr_segs = kiocb->ki_nbytes; - #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(rw, + return compat_import_iovec(rw, (struct compat_iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - else + len, UIO_FASTIOV, iovec, iter); #endif - ret = rw_copy_check_uvector(rw, - (struct iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); - if (ret < 0) - return ret; - - /* ki_nbytes now reflect bytes instead of segs */ - kiocb->ki_nbytes = ret; - return 0; -} - -static ssize_t aio_setup_single_vector(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - struct iovec *iovec) -{ - if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) - return -EFAULT; - - iovec->iov_base = buf; - iovec->iov_len = kiocb->ki_nbytes; - *nr_segs = 1; - return 0; + return import_iovec(rw, (struct iovec __user *)buf, + len, UIO_FASTIOV, iovec, iter); } /* @@ -1388,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, * Performs the initial checks and io submission. */ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, bool compat) + char __user *buf, size_t len, bool compat) { struct file *file = req->ki_filp; ssize_t ret; - unsigned long nr_segs; int rw; fmode_t mode; - aio_rw_op *rw_op; rw_iter_op *iter_op; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; @@ -1405,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, case IOCB_CMD_PREADV: mode = FMODE_READ; rw = READ; - rw_op = file->f_op->aio_read; iter_op = file->f_op->read_iter; goto rw_common; @@ -1413,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, case IOCB_CMD_PWRITEV: mode = FMODE_WRITE; rw = WRITE; - rw_op = file->f_op->aio_write; iter_op = file->f_op->write_iter; goto rw_common; rw_common: if (unlikely(!(file->f_mode & mode))) return -EBADF; - if (!rw_op && !iter_op) + if (!iter_op) return -EINVAL; - ret = (opcode == IOCB_CMD_PREADV || - opcode == IOCB_CMD_PWRITEV) - ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &iovec, compat) - : aio_setup_single_vector(req, rw, buf, &nr_segs, - iovec); + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) + ret = aio_setup_vectored_rw(rw, buf, len, + &iovec, compat, &iter); + else { + ret = import_single_range(rw, buf, len, iovec, &iter); + iovec = NULL; + } if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + ret = rw_verify_area(rw, file, &req->ki_pos, + iov_iter_count(&iter)); if (ret < 0) { - if (iovec != inline_vecs) - kfree(iovec); + kfree(iovec); return ret; } - req->ki_nbytes = ret; - - /* XXX: move/kill - rw_verify_area()? */ - /* This matches the pread()/pwrite() logic */ - if (req->ki_pos < 0) { - ret = -EINVAL; - break; - } + len = ret; if (rw == WRITE) file_start_write(file); - if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); - ret = iter_op(req, &iter); - } else { - ret = rw_op(req, iovec, nr_segs, req->ki_pos); - } + ret = iter_op(req, &iter); if (rw == WRITE) file_end_write(file); + kfree(iovec); break; case IOCB_CMD_FDSYNC: @@ -1479,9 +1469,6 @@ rw_common: return -EINVAL; } - if (iovec != inline_vecs) - kfree(iovec); - if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's @@ -1500,7 +1487,7 @@ rw_common: static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { - struct kiocb *req; + struct aio_kiocb *req; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1523,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->ki_filp = fget(iocb->aio_fildes); - if (unlikely(!req->ki_filp)) { + req->common.ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; } + req->common.ki_pos = iocb->aio_offset; + req->common.ki_complete = aio_complete; + req->common.ki_flags = iocb_flags(req->common.ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* @@ -1542,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_eventfd = NULL; goto out_put_req; } + + req->common.ki_flags |= IOCB_EVENTFD; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1550,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; } - req->ki_obj.user = user_iocb; + req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - req->ki_pos = iocb->aio_offset; - req->ki_nbytes = iocb->aio_nbytes; - ret = aio_run_iocb(req, iocb->aio_lio_opcode, + ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, + iocb->aio_nbytes, compat); if (ret) goto out_put_req; @@ -1643,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, /* lookup_kiocb * Finds a given iocb for cancellation. */ -static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, - u32 key) +static struct aio_kiocb * +lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) { - struct list_head *pos; + struct aio_kiocb *kiocb; assert_spin_locked(&ctx->ctx_lock); @@ -1654,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, return NULL; /* TODO: use a hash or array, this sucks. */ - list_for_each(pos, &ctx->active_reqs) { - struct kiocb *kiocb = list_kiocb(pos); - if (kiocb->ki_obj.user == iocb) + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_user_iocb == iocb) return kiocb; } return NULL; @@ -1676,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { struct kioctx *ctx; - struct kiocb *kiocb; + struct aio_kiocb *kiocb; u32 key; int ret; |