diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 94 |
1 files changed, 58 insertions, 36 deletions
@@ -15,6 +15,7 @@ #include <linux/aio_abi.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/backing-dev.h> #include <linux/uio.h> #define DEBUG 0 @@ -32,6 +33,9 @@ #include <linux/workqueue.h> #include <linux/security.h> #include <linux/eventfd.h> +#include <linux/blkdev.h> +#include <linux/mempool.h> +#include <linux/hash.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine); static DEFINE_SPINLOCK(fput_lock); static LIST_HEAD(fput_head); +#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ +#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) +struct aio_batch_entry { + struct hlist_node list; + struct address_space *mapping; +}; +mempool_t *abe_pool; + static void aio_kick_handler(struct work_struct *); static void aio_queue_work(struct kioctx *); @@ -73,6 +85,8 @@ static int __init aio_setup(void) kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); aio_wq = create_workqueue("aio"); + abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); + BUG_ON(!abe_pool); pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); @@ -697,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) */ ret = retry(iocb); - if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { - BUG_ON(!list_empty(&iocb->ki_wait.task_list)); + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) aio_complete(iocb, ret, 0); - } out: spin_lock_irq(&ctx->ctx_lock); @@ -852,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb) unsigned long flags; int run = 0; - /* We're supposed to be the only path putting the iocb back on the run - * list. If we find that the iocb is *back* on a wait queue already - * than retry has happened before we could queue the iocb. This also - * means that the retry could have completed and freed our iocb, no - * good. */ - BUG_ON((!list_empty(&iocb->ki_wait.task_list))); - spin_lock_irqsave(&ctx->ctx_lock, flags); /* set this inside the lock so that we can't race with aio_run_iocb() * testing it and putting the iocb on the run list under the lock */ @@ -872,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb) /* * kick_iocb: * Called typically from a wait queue callback context - * (aio_wake_function) to trigger a retry of the iocb. + * to trigger a retry of the iocb. * The retry is usually executed by aio workqueue * threads (See aio_kick_handler). */ @@ -1506,33 +1511,44 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) return 0; } -/* - * aio_wake_function: - * wait queue callback function for aio notification, - * Simply triggers a retry of the operation via kick_iocb. - * - * This callback is specified in the wait queue entry in - * a kiocb. - * - * Note: - * This routine is executed with the wait queue lock held. - * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests - * the ioctx lock inside the wait queue lock. This is safe - * because this callback isn't used for wait queues which - * are nested inside ioctx lock (i.e. ctx->wait) - */ -static int aio_wake_function(wait_queue_t *wait, unsigned mode, - int sync, void *key) +static void aio_batch_add(struct address_space *mapping, + struct hlist_head *batch_hash) +{ + struct aio_batch_entry *abe; + struct hlist_node *pos; + unsigned bucket; + + bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); + hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { + if (abe->mapping == mapping) + return; + } + + abe = mempool_alloc(abe_pool, GFP_KERNEL); + BUG_ON(!igrab(mapping->host)); + abe->mapping = mapping; + hlist_add_head(&abe->list, &batch_hash[bucket]); + return; +} + +static void aio_batch_free(struct hlist_head *batch_hash) { - struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait); + struct aio_batch_entry *abe; + struct hlist_node *pos, *n; + int i; - list_del_init(&wait->task_list); - kick_iocb(iocb); - return 1; + for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { + hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { + blk_run_address_space(abe->mapping); + iput(abe->mapping->host); + hlist_del(&abe->list); + mempool_free(abe, abe_pool); + } + } } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb) + struct iocb *iocb, struct hlist_head *batch_hash) { struct kiocb *req; struct file *file; @@ -1592,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; req->ki_left = req->ki_nbytes = iocb->aio_nbytes; req->ki_opcode = iocb->aio_lio_opcode; - init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); - INIT_LIST_HEAD(&req->ki_wait.task_list); ret = aio_setup_iocb(req); @@ -1608,6 +1622,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ; } spin_unlock_irq(&ctx->ctx_lock); + if (req->ki_opcode == IOCB_CMD_PREAD || + req->ki_opcode == IOCB_CMD_PREADV || + req->ki_opcode == IOCB_CMD_PWRITE || + req->ki_opcode == IOCB_CMD_PWRITEV) + aio_batch_add(file->f_mapping, batch_hash); + aio_put_req(req); /* drop extra ref to req */ return 0; @@ -1635,6 +1655,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, struct kioctx *ctx; long ret = 0; int i; + struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; if (unlikely(nr < 0)) return -EINVAL; @@ -1666,10 +1687,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp); + ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); if (ret) break; } + aio_batch_free(batch_hash); put_ioctx(ctx); return i ? i : ret; |