From 1312f40e11c57edb5c3250f1b782cef8e3efea82 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Mar 2006 11:02:03 -0500 Subject: [PATCH] regularize blk_cleanup_queue() use Signed-off-by: Al Viro --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 745ca1f67b14..88d60202b9db 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -840,7 +840,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) bad3: mempool_destroy(md->io_pool); bad2: - blk_put_queue(md->queue); + blk_cleanup_queue(md->queue); free_minor(minor); bad1: kfree(md); @@ -860,7 +860,7 @@ static void free_dev(struct mapped_device *md) del_gendisk(md->disk); free_minor(minor); put_disk(md->disk); - blk_put_queue(md->queue); + blk_cleanup_queue(md->queue); kfree(md); } -- cgit v1.2.3 From d2044a94e80b61f68ee7456f82d9b443e9ff6ac7 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Wed, 22 Mar 2006 00:07:42 -0800 Subject: [PATCH] dm: bio split bvec fix The code that handles bios that span table target boundaries by breaking them up into smaller bios will not split an individual struct bio_vec into more than two pieces. Sometimes more than that are required. This patch adds a loop to break the second piece up into as many pieces as are necessary. Cc: "Abhishek Gupta" Cc: Dan Smith Signed-off-by: Alasdair G Kergon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 88d60202b9db..26b08ee425c7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -533,30 +533,35 @@ static void __clone_and_map(struct clone_info *ci) } else { /* - * Create two copy bios to deal with io that has - * been split across a target. + * Handle a bvec that must be split between two or more targets. */ struct bio_vec *bv = bio->bi_io_vec + ci->idx; + sector_t remaining = to_sector(bv->bv_len); + unsigned int offset = 0; - clone = split_bvec(bio, ci->sector, ci->idx, - bv->bv_offset, max); - __map_bio(ti, clone, tio); + do { + if (offset) { + ti = dm_table_find_target(ci->map, ci->sector); + max = max_io_len(ci->md, ci->sector, ti); - ci->sector += max; - ci->sector_count -= max; - ti = dm_table_find_target(ci->map, ci->sector); - - len = to_sector(bv->bv_len) - max; - clone = split_bvec(bio, ci->sector, ci->idx, - bv->bv_offset + to_bytes(max), len); - tio = alloc_tio(ci->md); - tio->io = ci->io; - tio->ti = ti; - memset(&tio->info, 0, sizeof(tio->info)); - __map_bio(ti, clone, tio); + tio = alloc_tio(ci->md); + tio->io = ci->io; + tio->ti = ti; + memset(&tio->info, 0, sizeof(tio->info)); + } + + len = min(remaining, max); + + clone = split_bvec(bio, ci->sector, ci->idx, + bv->bv_offset + offset, len); + + __map_bio(ti, clone, tio); + + ci->sector += len; + ci->sector_count -= len; + offset += to_bytes(len); + } while (remaining -= len); - ci->sector += len; - ci->sector_count -= len; ci->idx++; } } -- cgit v1.2.3 From 2056a782f8e7e65fd4bfd027506b4ce1c5e9ccd4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 23 Mar 2006 20:00:26 +0100 Subject: [PATCH] Block queue IO tracing support (blktrace) as of 2006-03-23 Signed-off-by: Jens Axboe --- block/Kconfig | 12 + block/Makefile | 2 + block/blktrace.c | 538 +++++++++++++++++++++++++++++++++++++++++++ block/elevator.c | 4 + block/ioctl.c | 6 + block/ll_rw_blk.c | 44 +++- drivers/block/cciss.c | 2 + drivers/md/dm.c | 13 +- fs/bio.c | 4 + fs/compat_ioctl.c | 1 + include/linux/blkdev.h | 3 + include/linux/blktrace_api.h | 277 ++++++++++++++++++++++ include/linux/compat_ioctl.h | 4 + include/linux/fs.h | 4 + include/linux/sched.h | 1 + kernel/fork.c | 1 + mm/highmem.c | 3 + 17 files changed, 916 insertions(+), 3 deletions(-) create mode 100644 block/blktrace.c create mode 100644 include/linux/blktrace_api.h (limited to 'drivers/md/dm.c') diff --git a/block/Kconfig b/block/Kconfig index 377f6dd20e17..96783645092d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -11,4 +11,16 @@ config LBD your machine, or if you want to have a raid or loopback device bigger than 2TB. Otherwise say N. +config BLK_DEV_IO_TRACE + bool "Support for tracing block io actions" + select RELAY + select DEBUG_FS + help + Say Y here, if you want to be able to trace the block layer actions + on a given queue. Tracing allows you to see any traffic happening + on a block device queue. For more information (and the user space + support tools needed), fetch the blktrace app from: + + git://brick.kernel.dk/data/git/blktrace.git + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 7e4f93e2b44e..c05de0e0037f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,3 +8,5 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o + +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o diff --git a/block/blktrace.c b/block/blktrace.c new file mode 100644 index 000000000000..36f3a172275f --- /dev/null +++ b/block/blktrace.c @@ -0,0 +1,538 @@ +/* + * Copyright (C) 2006 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, }; +static unsigned int blktrace_seq __read_mostly = 1; + +/* + * Send out a notify for this process, if we haven't done so since a trace + * started + */ +static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) +{ + struct blk_io_trace *t; + + t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm)); + if (t) { + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; + t->device = bt->dev; + t->action = BLK_TC_ACT(BLK_TC_NOTIFY); + t->pid = tsk->pid; + t->cpu = smp_processor_id(); + t->pdu_len = sizeof(tsk->comm); + memcpy((void *) t + sizeof(*t), tsk->comm, t->pdu_len); + tsk->btrace_seq = blktrace_seq; + } +} + +static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, + pid_t pid) +{ + if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) + return 1; + if (sector < bt->start_lba || sector > bt->end_lba) + return 1; + if (bt->pid && pid != bt->pid) + return 1; + + return 0; +} + +/* + * Data direction bit lookup + */ +static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; + +/* + * Bio action bits of interest + */ +static u32 bio_act[3] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC) }; + +/* + * More could be added as needed, taking care to increment the decrementer + * to get correct indexing + */ +#define trace_barrier_bit(rw) \ + (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) +#define trace_sync_bit(rw) \ + (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) + +/* + * The worker for the various blk_add_trace*() types. Fills out a + * blk_io_trace structure and places it in a per-cpu subbuffer. + */ +void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, + int rw, u32 what, int error, int pdu_len, void *pdu_data) +{ + struct task_struct *tsk = current; + struct blk_io_trace *t; + unsigned long flags; + unsigned long *sequence; + pid_t pid; + int cpu; + + if (unlikely(bt->trace_state != Blktrace_running)) + return; + + what |= ddir_act[rw & WRITE]; + what |= bio_act[trace_barrier_bit(rw)]; + what |= bio_act[trace_sync_bit(rw)]; + + pid = tsk->pid; + if (unlikely(act_log_check(bt, what, sector, pid))) + return; + + /* + * A word about the locking here - we disable interrupts to reserve + * some space in the relay per-cpu buffer, to prevent an irq + * from coming in and stepping on our toes. Once reserved, it's + * enough to get preemption disabled to prevent read of this data + * before we are through filling it. get_cpu()/put_cpu() does this + * for us + */ + local_irq_save(flags); + + if (unlikely(tsk->btrace_seq != blktrace_seq)) + trace_note_tsk(bt, tsk); + + t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); + if (t) { + cpu = smp_processor_id(); + sequence = per_cpu_ptr(bt->sequence, cpu); + + t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; + t->sequence = ++(*sequence); + t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu); + t->sector = sector; + t->bytes = bytes; + t->action = what; + t->pid = pid; + t->device = bt->dev; + t->cpu = cpu; + t->error = error; + t->pdu_len = pdu_len; + + if (pdu_len) + memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); + } + + local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(__blk_add_trace); + +static struct dentry *blk_tree_root; +static struct mutex blk_tree_mutex; +static unsigned int root_users; + +static inline void blk_remove_root(void) +{ + if (blk_tree_root) { + debugfs_remove(blk_tree_root); + blk_tree_root = NULL; + } +} + +static void blk_remove_tree(struct dentry *dir) +{ + mutex_lock(&blk_tree_mutex); + debugfs_remove(dir); + if (--root_users == 0) + blk_remove_root(); + mutex_unlock(&blk_tree_mutex); +} + +static struct dentry *blk_create_tree(const char *blk_name) +{ + struct dentry *dir = NULL; + + mutex_lock(&blk_tree_mutex); + + if (!blk_tree_root) { + blk_tree_root = debugfs_create_dir("block", NULL); + if (!blk_tree_root) + goto err; + } + + dir = debugfs_create_dir(blk_name, blk_tree_root); + if (dir) + root_users++; + else + blk_remove_root(); + +err: + mutex_unlock(&blk_tree_mutex); + return dir; +} + +static void blk_trace_cleanup(struct blk_trace *bt) +{ + relay_close(bt->rchan); + debugfs_remove(bt->dropped_file); + blk_remove_tree(bt->dir); + free_percpu(bt->sequence); + kfree(bt); +} + +static int blk_trace_remove(request_queue_t *q) +{ + struct blk_trace *bt; + + bt = xchg(&q->blk_trace, NULL); + if (!bt) + return -EINVAL; + + if (bt->trace_state == Blktrace_setup || + bt->trace_state == Blktrace_stopped) + blk_trace_cleanup(bt); + + return 0; +} + +static int blk_dropped_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->u.generic_ip; + + return 0; +} + +static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct blk_trace *bt = filp->private_data; + char buf[16]; + + snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); + + return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); +} + +static struct file_operations blk_dropped_fops = { + .owner = THIS_MODULE, + .open = blk_dropped_open, + .read = blk_dropped_read, +}; + +/* + * Keep track of how many times we encountered a full subbuffer, to aid + * the user space app in telling how many lost events there were. + */ +static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + struct blk_trace *bt; + + if (!relay_buf_full(buf)) + return 1; + + bt = buf->chan->private_data; + atomic_inc(&bt->dropped); + return 0; +} + +static int blk_remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct dentry *blk_create_buf_file_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); +} + +static struct rchan_callbacks blk_relay_callbacks = { + .subbuf_start = blk_subbuf_start_callback, + .create_buf_file = blk_create_buf_file_callback, + .remove_buf_file = blk_remove_buf_file_callback, +}; + +/* + * Setup everything required to start tracing + */ +static int blk_trace_setup(request_queue_t *q, struct block_device *bdev, + char __user *arg) +{ + struct blk_user_trace_setup buts; + struct blk_trace *old_bt, *bt = NULL; + struct dentry *dir = NULL; + char b[BDEVNAME_SIZE]; + int ret, i; + + if (copy_from_user(&buts, arg, sizeof(buts))) + return -EFAULT; + + if (!buts.buf_size || !buts.buf_nr) + return -EINVAL; + + strcpy(buts.name, bdevname(bdev, b)); + + /* + * some device names have larger paths - convert the slashes + * to underscores for this to work as expected + */ + for (i = 0; i < strlen(buts.name); i++) + if (buts.name[i] == '/') + buts.name[i] = '_'; + + if (copy_to_user(arg, &buts, sizeof(buts))) + return -EFAULT; + + ret = -ENOMEM; + bt = kzalloc(sizeof(*bt), GFP_KERNEL); + if (!bt) + goto err; + + bt->sequence = alloc_percpu(unsigned long); + if (!bt->sequence) + goto err; + + ret = -ENOENT; + dir = blk_create_tree(buts.name); + if (!dir) + goto err; + + bt->dir = dir; + bt->dev = bdev->bd_dev; + atomic_set(&bt->dropped, 0); + + ret = -EIO; + bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); + if (!bt->dropped_file) + goto err; + + bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks); + if (!bt->rchan) + goto err; + bt->rchan->private_data = bt; + + bt->act_mask = buts.act_mask; + if (!bt->act_mask) + bt->act_mask = (u16) -1; + + bt->start_lba = buts.start_lba; + bt->end_lba = buts.end_lba; + if (!bt->end_lba) + bt->end_lba = -1ULL; + + bt->pid = buts.pid; + bt->trace_state = Blktrace_setup; + + ret = -EBUSY; + old_bt = xchg(&q->blk_trace, bt); + if (old_bt) { + (void) xchg(&q->blk_trace, old_bt); + goto err; + } + + return 0; +err: + if (dir) + blk_remove_tree(dir); + if (bt) { + if (bt->dropped_file) + debugfs_remove(bt->dropped_file); + if (bt->sequence) + free_percpu(bt->sequence); + if (bt->rchan) + relay_close(bt->rchan); + kfree(bt); + } + return ret; +} + +static int blk_trace_startstop(request_queue_t *q, int start) +{ + struct blk_trace *bt; + int ret; + + if ((bt = q->blk_trace) == NULL) + return -EINVAL; + + /* + * For starting a trace, we can transition from a setup or stopped + * trace. For stopping a trace, the state must be running + */ + ret = -EINVAL; + if (start) { + if (bt->trace_state == Blktrace_setup || + bt->trace_state == Blktrace_stopped) { + blktrace_seq++; + smp_mb(); + bt->trace_state = Blktrace_running; + ret = 0; + } + } else { + if (bt->trace_state == Blktrace_running) { + bt->trace_state = Blktrace_stopped; + relay_flush(bt->rchan); + ret = 0; + } + } + + return ret; +} + +/** + * blk_trace_ioctl: - handle the ioctls associated with tracing + * @bdev: the block device + * @cmd: the ioctl cmd + * @arg: the argument data, if any + * + **/ +int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) +{ + request_queue_t *q; + int ret, start = 0; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + mutex_lock(&bdev->bd_mutex); + + switch (cmd) { + case BLKTRACESETUP: + ret = blk_trace_setup(q, bdev, arg); + break; + case BLKTRACESTART: + start = 1; + case BLKTRACESTOP: + ret = blk_trace_startstop(q, start); + break; + case BLKTRACETEARDOWN: + ret = blk_trace_remove(q); + break; + default: + ret = -ENOTTY; + break; + } + + mutex_unlock(&bdev->bd_mutex); + return ret; +} + +/** + * blk_trace_shutdown: - stop and cleanup trace structures + * @q: the request queue associated with the device + * + **/ +void blk_trace_shutdown(request_queue_t *q) +{ + blk_trace_startstop(q, 0); + blk_trace_remove(q); +} + +/* + * Average offset over two calls to sched_clock() with a gettimeofday() + * in the middle + */ +static void blk_check_time(unsigned long long *t) +{ + unsigned long long a, b; + struct timeval tv; + + a = sched_clock(); + do_gettimeofday(&tv); + b = sched_clock(); + + *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000; + *t -= (a + b) / 2; +} + +static void blk_trace_check_cpu_time(void *data) +{ + unsigned long long *t; + int cpu = get_cpu(); + + t = &per_cpu(blk_trace_cpu_offset, cpu); + + /* + * Just call it twice, hopefully the second call will be cache hot + * and a little more precise + */ + blk_check_time(t); + blk_check_time(t); + + put_cpu(); +} + +/* + * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU + * timings + */ +static void blk_trace_calibrate_offsets(void) +{ + unsigned long flags; + + smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1); + local_irq_save(flags); + blk_trace_check_cpu_time(NULL); + local_irq_restore(flags); +} + +static void blk_trace_set_ht_offsets(void) +{ +#if defined(CONFIG_SCHED_SMT) + int cpu, i; + + /* + * now make sure HT siblings have the same time offset + */ + preempt_disable(); + for_each_online_cpu(cpu) { + unsigned long long *cpu_off, *sibling_off; + + for_each_cpu_mask(i, cpu_sibling_map[cpu]) { + if (i == cpu) + continue; + + cpu_off = &per_cpu(blk_trace_cpu_offset, cpu); + sibling_off = &per_cpu(blk_trace_cpu_offset, i); + *sibling_off = *cpu_off; + } + } + preempt_enable(); +#endif +} + +static __init int blk_trace_init(void) +{ + mutex_init(&blk_tree_mutex); + blk_trace_calibrate_offsets(); + blk_trace_set_ht_offsets(); + + return 0; +} + +module_init(blk_trace_init); + diff --git a/block/elevator.c b/block/elevator.c index db3d0d8296a0..5e558c4689a4 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -333,6 +334,8 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) struct list_head *pos; unsigned ordseq; + blk_add_trace_rq(q, rq, BLK_TA_INSERT); + rq->q = q; switch (where) { @@ -499,6 +502,7 @@ struct request *elv_next_request(request_queue_t *q) * not be passed by new incoming requests */ rq->flags |= REQ_STARTED; + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } if (!q->boundary_rq || q->boundary_rq == rq) { diff --git a/block/ioctl.c b/block/ioctl.c index 35fdb7dc6512..9cfa2e1ecb24 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -5,6 +5,7 @@ #include #include #include +#include #include static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) @@ -189,6 +190,11 @@ static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev, return put_ulong(arg, bdev->bd_inode->i_size >> 9); case BLKGETSIZE64: return put_u64(arg, bdev->bd_inode->i_size); + case BLKTRACESTART: + case BLKTRACESTOP: + case BLKTRACESETUP: + case BLKTRACETEARDOWN: + return blk_trace_ioctl(bdev, cmd, (char __user *) arg); } return -ENOIOCTLCMD; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 6c793b196aa9..062067fa7ead 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -28,6 +28,7 @@ #include #include #include +#include /* * for max sense size @@ -1556,8 +1557,10 @@ void blk_plug_device(request_queue_t *q) if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) return; - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); + blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); + } } EXPORT_SYMBOL(blk_plug_device); @@ -1621,14 +1624,21 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi, /* * devices don't necessarily have an ->unplug_fn defined */ - if (q->unplug_fn) + if (q->unplug_fn) { + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); + } } static void blk_unplug_work(void *data) { request_queue_t *q = data; + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + q->unplug_fn(q); } @@ -1636,6 +1646,9 @@ static void blk_unplug_timeout(unsigned long data) { request_queue_t *q = (request_queue_t *)data; + blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, + q->rq.count[READ] + q->rq.count[WRITE]); + kblockd_schedule_work(&q->unplug_work); } @@ -1753,6 +1766,9 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); + if (q->blk_trace) + blk_trace_shutdown(q); + kmem_cache_free(requestq_cachep, q); } @@ -2129,6 +2145,8 @@ rq_starved: rq_init(q, rq); rq->rl = rl; + + blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; } @@ -2157,6 +2175,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw, if (!rq) { struct io_context *ioc; + blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); + __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); io_schedule(); @@ -2210,6 +2230,8 @@ EXPORT_SYMBOL(blk_get_request); */ void blk_requeue_request(request_queue_t *q, struct request *rq) { + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -2844,6 +2866,8 @@ static int __make_request(request_queue_t *q, struct bio *bio) if (!q->back_merge_fn(q, req, bio)) break; + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + req->biotail->bi_next = bio; req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; @@ -2859,6 +2883,8 @@ static int __make_request(request_queue_t *q, struct bio *bio) if (!q->front_merge_fn(q, req, bio)) break; + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + bio->bi_next = req->bio; req->bio = bio; @@ -2976,6 +3002,7 @@ void generic_make_request(struct bio *bio) request_queue_t *q; sector_t maxsector; int ret, nr_sectors = bio_sectors(bio); + dev_t old_dev; might_sleep(); /* Test device or partition size, when known. */ @@ -3002,6 +3029,8 @@ void generic_make_request(struct bio *bio) * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ + maxsector = -1; + old_dev = 0; do { char b[BDEVNAME_SIZE]; @@ -3034,6 +3063,15 @@ end_io: */ blk_partition_remap(bio); + if (maxsector != -1) + blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, + maxsector); + + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + + maxsector = bio->bi_sector; + old_dev = bio->bi_bdev->bd_dev; + ret = q->make_request_fn(q, bio); } while (ret); } @@ -3153,6 +3191,8 @@ static int __end_that_request_first(struct request *req, int uptodate, int total_bytes, bio_nbytes, error, next_idx = 0; struct bio *bio; + blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); + /* * extend uptodate bool to allow < 0 value to be direct io error */ diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e29b8926f80e..1f2890989b56 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -2331,6 +2332,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd, cmd->rq->completion_data = cmd; cmd->rq->errors = status; + blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE); blk_complete_request(cmd->rq); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 26b08ee425c7..8c82373f7ff3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -17,6 +17,7 @@ #include #include #include +#include static const char *_name = DM_NAME; @@ -334,6 +335,8 @@ static void dec_pending(struct dm_io *io, int error) /* nudge anyone waiting on suspend queue */ wake_up(&io->md->wait); + blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE); + bio_endio(io->bio, io->bio->bi_size, io->error); free_io(io->md, io); } @@ -392,6 +395,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, struct target_io *tio) { int r; + sector_t sector; /* * Sanity checks. @@ -407,10 +411,17 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, * this io. */ atomic_inc(&tio->io->io_count); + sector = clone->bi_sector; r = ti->type->map(ti, clone, &tio->info); - if (r > 0) + if (r > 0) { /* the bio has been remapped so dispatch it */ + + blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, + tio->io->bio->bi_bdev->bd_dev, sector, + clone->bi_sector); + generic_make_request(clone); + } else if (r < 0) { /* error the io and bail out */ diff --git a/fs/bio.c b/fs/bio.c index 8f1d2e815c96..0a8c59cb68f5 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /* for struct sg_iovec */ #define BIO_POOL_SIZE 256 @@ -1095,6 +1096,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) if (!bp) return bp; + blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, + bi->bi_sector + first_sectors); + BUG_ON(bi->bi_vcnt != 1); BUG_ON(bi->bi_idx != 0); atomic_set(&bp->cnt, 3); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c666769a875d..7c031f00fd79 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -72,6 +72,7 @@ #include #include #include +#include #include /* siocdevprivate_ioctl */ #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 56bb6a4e15f3..c179966f1a2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -22,6 +22,7 @@ typedef struct request_queue request_queue_t; struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; +struct blk_trace; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -416,6 +417,8 @@ struct request_queue unsigned int sg_reserved_size; int node; + struct blk_trace *blk_trace; + /* * reserved for flush operations */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h new file mode 100644 index 000000000000..b34d3e73d5ea --- /dev/null +++ b/include/linux/blktrace_api.h @@ -0,0 +1,277 @@ +#ifndef BLKTRACE_H +#define BLKTRACE_H + +#include +#include +#include + +/* + * Trace categories + */ +enum blktrace_cat { + BLK_TC_READ = 1 << 0, /* reads */ + BLK_TC_WRITE = 1 << 1, /* writes */ + BLK_TC_BARRIER = 1 << 2, /* barrier */ + BLK_TC_SYNC = 1 << 3, /* barrier */ + BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ + BLK_TC_REQUEUE = 1 << 5, /* requeueing */ + BLK_TC_ISSUE = 1 << 6, /* issue */ + BLK_TC_COMPLETE = 1 << 7, /* completions */ + BLK_TC_FS = 1 << 8, /* fs requests */ + BLK_TC_PC = 1 << 9, /* pc requests */ + BLK_TC_NOTIFY = 1 << 10, /* special message */ + + BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ +}; + +#define BLK_TC_SHIFT (16) +#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT) + +/* + * Basic trace actions + */ +enum blktrace_act { + __BLK_TA_QUEUE = 1, /* queued */ + __BLK_TA_BACKMERGE, /* back merged to existing rq */ + __BLK_TA_FRONTMERGE, /* front merge to existing rq */ + __BLK_TA_GETRQ, /* allocated new request */ + __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */ + __BLK_TA_REQUEUE, /* request requeued */ + __BLK_TA_ISSUE, /* sent to driver */ + __BLK_TA_COMPLETE, /* completed by driver */ + __BLK_TA_PLUG, /* queue was plugged */ + __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */ + __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */ + __BLK_TA_INSERT, /* insert request */ + __BLK_TA_SPLIT, /* bio was split */ + __BLK_TA_BOUNCE, /* bio was bounced */ + __BLK_TA_REMAP, /* bio was remapped */ +}; + +/* + * Trace actions in full. Additionally, read or write is masked + */ +#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE)) +#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE)) +#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE)) +#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_SPLIT (__BLK_TA_SPLIT) +#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) +#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) + +#define BLK_IO_TRACE_MAGIC 0x65617400 +#define BLK_IO_TRACE_VERSION 0x07 + +/* + * The trace itself + */ +struct blk_io_trace { + u32 magic; /* MAGIC << 8 | version */ + u32 sequence; /* event number */ + u64 time; /* in microseconds */ + u64 sector; /* disk offset */ + u32 bytes; /* transfer length */ + u32 action; /* what happened */ + u32 pid; /* who did it */ + u32 device; /* device number */ + u32 cpu; /* on what cpu did it happen */ + u16 error; /* completion error */ + u16 pdu_len; /* length of data after this trace */ +}; + +/* + * The remap event + */ +struct blk_io_trace_remap { + u32 device; + u32 __pad; + u64 sector; +}; + +enum { + Blktrace_setup = 1, + Blktrace_running, + Blktrace_stopped, +}; + +struct blk_trace { + int trace_state; + struct rchan *rchan; + unsigned long *sequence; + u16 act_mask; + u64 start_lba; + u64 end_lba; + u32 pid; + u32 dev; + struct dentry *dir; + struct dentry *dropped_file; + atomic_t dropped; +}; + +/* + * User setup structure passed with BLKTRACESTART + */ +struct blk_user_trace_setup { + char name[BDEVNAME_SIZE]; /* output */ + u16 act_mask; /* input */ + u32 buf_size; /* input */ + u32 buf_nr; /* input */ + u64 start_lba; + u64 end_lba; + u32 pid; +}; + +#if defined(CONFIG_BLK_DEV_IO_TRACE) +extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); +extern void blk_trace_shutdown(request_queue_t *); +extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); + +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->flags & 0x07; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); + } +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +/** + * blk_add_trace_generic - Add a trace for a generic action + * @q: queue the io is for + * @bio: the source bio + * @rw: the data direction + * @what: the action + * + * Description: + * Records a simple trace + * + **/ +static inline void blk_add_trace_generic(struct request_queue *q, + struct bio *bio, int rw, u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (bio) + blk_add_trace_bio(q, bio, what); + else + __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); +} + +/** + * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload + * @q: queue the io is for + * @what: the action + * @bio: the source bio + * @pdu: the integer payload + * + * Description: + * Adds a trace with some integer payload. This might be an unplug + * option given as the action, with the depth at unplug time given + * as the payload + * + **/ +static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, + struct bio *bio, unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + u64 rpdu = cpu_to_be64(pdu); + + if (likely(!bt)) + return; + + if (bio) + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); + else + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +#else /* !CONFIG_BLK_DEV_IO_TRACE */ +#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +#define blk_trace_shutdown(q) do { } while (0) +#define blk_add_trace_rq(q, rq, what) do { } while (0) +#define blk_add_trace_bio(q, rq, what) do { } while (0) +#define blk_add_trace_generic(q, rq, rw, what) do { } while (0) +#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) +#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) +#endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#endif diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index ae7dfb790df3..efb518f16bb3 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -97,6 +97,10 @@ COMPATIBLE_IOCTL(BLKRRPART) COMPATIBLE_IOCTL(BLKFLSBUF) COMPATIBLE_IOCTL(BLKSECTSET) COMPATIBLE_IOCTL(BLKSSZGET) +COMPATIBLE_IOCTL(BLKTRACESTART) +COMPATIBLE_IOCTL(BLKTRACESTOP) +COMPATIBLE_IOCTL(BLKTRACESETUP) +COMPATIBLE_IOCTL(BLKTRACETEARDOWN) ULONG_IOCTL(BLKRASET) ULONG_IOCTL(BLKFRASET) /* RAID */ diff --git a/include/linux/fs.h b/include/linux/fs.h index f9c9dea636d0..9b34a1b03455 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -197,6 +197,10 @@ extern int dir_notify_enable; #define BLKBSZGET _IOR(0x12,112,size_t) #define BLKBSZSET _IOW(0x12,113,size_t) #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ +#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) +#define BLKTRACESTART _IO(0x12,116) +#define BLKTRACESTOP _IO(0x12,117) +#define BLKTRACETEARDOWN _IO(0x12,118) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 62e6314382f0..e60a91d5b369 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -706,6 +706,7 @@ struct task_struct { prio_array_t *array; unsigned short ioprio; + unsigned int btrace_seq; unsigned long sleep_avg; unsigned long long timestamp, last_ran; diff --git a/kernel/fork.c b/kernel/fork.c index c79ae0b19a49..c21bae8c93b9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -181,6 +181,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); + tsk->btrace_seq = 0; return tsk; } diff --git a/mm/highmem.c b/mm/highmem.c index ce2e7e8bbfa7..d0ea1eec6a9a 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -26,6 +26,7 @@ #include #include #include +#include #include static mempool_t *page_pool, *isa_page_pool; @@ -483,6 +484,8 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) pool = isa_page_pool; } + blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + /* * slow path */ -- cgit v1.2.3 From 93d2341c750cda0df48a6cc67b35fe25f1ec47df Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:50 -0800 Subject: [PATCH] mempool: use mempool_create_slab_pool() Modify well over a dozen mempool users to call mempool_create_slab_pool() rather than calling mempool_create() with extra arguments, saving about 30 lines of code and increasing readability. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 2 +- drivers/block/aoe/aoeblk.c | 4 +--- drivers/md/dm-crypt.c | 3 +-- drivers/md/dm-mpath.c | 3 +-- drivers/md/dm-snap.c | 3 +-- drivers/md/dm.c | 6 ++---- drivers/md/kcopyd.c | 3 +-- drivers/message/i2o/i2o_block.c | 7 +++---- drivers/scsi/iscsi_tcp.c | 4 ++-- drivers/scsi/qla2xxx/qla_os.c | 3 +-- drivers/scsi/scsi_lib.c | 5 ++--- fs/bio.c | 7 ++----- fs/cifs/cifsfs.c | 18 ++++++------------ fs/jfs/jfs_metapage.c | 4 ++-- fs/nfs/read.c | 6 ++---- fs/nfs/write.c | 12 ++++-------- fs/xfs/linux-2.6/xfs_super.c | 5 ++--- include/linux/i2o.h | 4 +--- net/sunrpc/sched.c | 12 ++++-------- 19 files changed, 39 insertions(+), 72 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c4a0d5d8d7f0..bde40a6ae665 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2191,7 +2191,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) if (!cfqd->cfq_hash) goto out_cfqhash; - cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool); + cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); if (!cfqd->crq_pool) goto out_crqpool; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 32fea55fac48..393b86a3dbf8 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -211,9 +211,7 @@ aoeblk_gdalloc(void *vp) return; } - d->bufpool = mempool_create(MIN_BUFS, - mempool_alloc_slab, mempool_free_slab, - buf_pool_cache); + d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache); if (d->bufpool == NULL) { printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate bufpool " "for %ld.%ld\n", d->aoemajor, d->aoeminor); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d88b8eda3903..259e86f26549 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -616,8 +616,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } } - cc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, - mempool_free_slab, _crypt_io_pool); + cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); if (!cc->io_pool) { ti->error = PFX "Cannot allocate crypt io mempool"; goto bad3; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f72a82fb9434..1816f30678ed 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -179,8 +179,7 @@ static struct multipath *alloc_multipath(void) m->queue_io = 1; INIT_WORK(&m->process_queued_ios, process_queued_ios, m); INIT_WORK(&m->trigger_event, trigger_event, m); - m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab, - mempool_free_slab, _mpio_cache); + m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); if (!m->mpio_pool) { kfree(m); return NULL; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f3759dd7828e..7401540086df 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1174,8 +1174,7 @@ static int __init dm_snapshot_init(void) goto bad4; } - pending_pool = mempool_create(128, mempool_alloc_slab, - mempool_free_slab, pending_cache); + pending_pool = mempool_create_slab_pool(128, pending_cache); if (!pending_pool) { DMERR("Couldn't create pending pool."); r = -ENOMEM; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8c82373f7ff3..a64798ef481e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -823,13 +823,11 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) md->queue->unplug_fn = dm_unplug_all; md->queue->issue_flush_fn = dm_flush_all; - md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, - mempool_free_slab, _io_cache); + md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); if (!md->io_pool) goto bad2; - md->tio_pool = mempool_create(MIN_IOS, mempool_alloc_slab, - mempool_free_slab, _tio_cache); + md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); if (!md->tio_pool) goto bad3; diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 0d54e8b7d9de..9dcb2c8a3853 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -227,8 +227,7 @@ static int jobs_init(void) if (!_job_cache) return -ENOMEM; - _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, - mempool_free_slab, _job_cache); + _job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!_job_pool) { kmem_cache_destroy(_job_cache); return -ENOMEM; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index b09fb6307153..7d4c5497785b 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -1179,10 +1179,9 @@ static int __init i2o_block_init(void) goto exit; } - i2o_blk_req_pool.pool = mempool_create(I2O_BLOCK_REQ_MEMPOOL_SIZE, - mempool_alloc_slab, - mempool_free_slab, - i2o_blk_req_pool.slab); + i2o_blk_req_pool.pool = + mempool_create_slab_pool(I2O_BLOCK_REQ_MEMPOOL_SIZE, + i2o_blk_req_pool.slab); if (!i2o_blk_req_pool.pool) { osm_err("can't init request mempool\n"); rc = -ENOMEM; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 7b82ff090d42..2068b66822b7 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -3200,8 +3200,8 @@ iscsi_r2tpool_alloc(struct iscsi_session *session) * Data-Out PDU's within R2T-sequence can be quite big; * using mempool */ - ctask->datapool = mempool_create(ISCSI_DTASK_DEFAULT_MAX, - mempool_alloc_slab, mempool_free_slab, taskcache); + ctask->datapool = mempool_create_slab_pool(ISCSI_DTASK_DEFAULT_MAX, + taskcache); if (ctask->datapool == NULL) { kfifo_free(ctask->r2tqueue); iscsi_pool_free(&ctask->r2tpool, (void**)ctask->r2ts); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 029bbf461bb2..017729c59a49 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2154,8 +2154,7 @@ qla2x00_allocate_sp_pool(scsi_qla_host_t *ha) int rval; rval = QLA_SUCCESS; - ha->srb_mempool = mempool_create(SRB_MIN_REQ, mempool_alloc_slab, - mempool_free_slab, srb_cachep); + ha->srb_mempool = mempool_create_slab_pool(SRB_MIN_REQ, srb_cachep); if (ha->srb_mempool == NULL) { qla_printk(KERN_INFO, ha, "Unable to allocate SRB mempool.\n"); rval = QLA_FUNCTION_FAILED; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ede158d08d9d..8f010a314a3d 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1787,9 +1787,8 @@ int __init scsi_init_queue(void) sgp->name); } - sgp->pool = mempool_create(SG_MEMPOOL_SIZE, - mempool_alloc_slab, mempool_free_slab, - sgp->slab); + sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, + sgp->slab); if (!sgp->pool) { printk(KERN_ERR "SCSI: can't init sg mempool %s\n", sgp->name); diff --git a/fs/bio.c b/fs/bio.c index 377046d82945..eb8fbc53f2cd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1141,8 +1141,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale) if (i >= scale) pool_entries >>= 1; - *bvp = mempool_create(pool_entries, mempool_alloc_slab, - mempool_free_slab, bp->slab); + *bvp = mempool_create_slab_pool(pool_entries, bp->slab); if (!*bvp) return -ENOMEM; } @@ -1179,9 +1178,7 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale) if (!bs) return NULL; - bs->bio_pool = mempool_create(bio_pool_size, mempool_alloc_slab, - mempool_free_slab, bio_slab); - + bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); if (!bs->bio_pool) goto bad; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 221b3334b737..6b99b51d6694 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -738,10 +738,8 @@ cifs_init_request_bufs(void) cERROR(1,("cifs_min_rcv set to maximum (64)")); } - cifs_req_poolp = mempool_create(cifs_min_rcv, - mempool_alloc_slab, - mempool_free_slab, - cifs_req_cachep); + cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, + cifs_req_cachep); if(cifs_req_poolp == NULL) { kmem_cache_destroy(cifs_req_cachep); @@ -771,10 +769,8 @@ cifs_init_request_bufs(void) cFYI(1,("cifs_min_small set to maximum (256)")); } - cifs_sm_req_poolp = mempool_create(cifs_min_small, - mempool_alloc_slab, - mempool_free_slab, - cifs_sm_req_cachep); + cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, + cifs_sm_req_cachep); if(cifs_sm_req_poolp == NULL) { mempool_destroy(cifs_req_poolp); @@ -808,10 +804,8 @@ cifs_init_mids(void) if (cifs_mid_cachep == NULL) return -ENOMEM; - cifs_mid_poolp = mempool_create(3 /* a reasonable min simultan opers */, - mempool_alloc_slab, - mempool_free_slab, - cifs_mid_cachep); + /* 3 is a reasonable minimum number of simultaneous operations */ + cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep); if(cifs_mid_poolp == NULL) { kmem_cache_destroy(cifs_mid_cachep); return -ENOMEM; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 8508043849f3..f28696f235c4 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -220,8 +220,8 @@ int __init metapage_init(void) if (metapage_cache == NULL) return -ENOMEM; - metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab, - mempool_free_slab, metapage_cache); + metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES, + metapage_cache); if (metapage_mempool == NULL) { kmem_cache_destroy(metapage_cache); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3961524fd4ab..624ca7146b6b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -663,10 +663,8 @@ int nfs_init_readpagecache(void) if (nfs_rdata_cachep == NULL) return -ENOMEM; - nfs_rdata_mempool = mempool_create(MIN_POOL_READ, - mempool_alloc_slab, - mempool_free_slab, - nfs_rdata_cachep); + nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, + nfs_rdata_cachep); if (nfs_rdata_mempool == NULL) return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3f5225404c97..4cfada2cc09f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1521,17 +1521,13 @@ int nfs_init_writepagecache(void) if (nfs_wdata_cachep == NULL) return -ENOMEM; - nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, + nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) return -ENOMEM; - nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, + nfs_wdata_cachep); if (nfs_commit_mempool == NULL) return -ENOMEM; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8355faf8ffde..1884300417e3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -375,9 +375,8 @@ xfs_init_zones(void) if (!xfs_ioend_zone) goto out_destroy_vnode_zone; - xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE, - mempool_alloc_slab, mempool_free_slab, - xfs_ioend_zone); + xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, + xfs_ioend_zone); if (!xfs_ioend_pool) goto out_free_ioend_zone; return 0; diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 5a9d8c599171..dd7d627bf66f 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -950,9 +950,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, if (!pool->slab) goto free_name; - pool->mempool = - mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, - pool->slab); + pool->mempool = mempool_create_slab_pool(min_nr, pool->slab); if (!pool->mempool) goto free_slab; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b9969b91a9f7..5c3eee768504 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1167,16 +1167,12 @@ rpc_init_mempool(void) NULL, NULL); if (!rpc_buffer_slabp) goto err_nomem; - rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE, - mempool_alloc_slab, - mempool_free_slab, - rpc_task_slabp); + rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE, + rpc_task_slabp); if (!rpc_task_mempool) goto err_nomem; - rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE, - mempool_alloc_slab, - mempool_free_slab, - rpc_buffer_slabp); + rpc_buffer_mempool = mempool_create_slab_pool(RPC_BUFFER_POOLSIZE, + rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; return 0; -- cgit v1.2.3 From 1ecac7fd74f2e5fb06a7719ecba55fb5778a9a47 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Mon, 27 Mar 2006 01:17:51 -0800 Subject: [PATCH] dm flush queue EINTR If dm_suspend() is cancelled, bios already added to the deferred list need to be submitted. Otherwise they remain 'in limbo' until there's a dm_resume(). Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a64798ef481e..48be69f3bcfa 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1107,6 +1107,7 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) { struct dm_table *map = NULL; DECLARE_WAITQUEUE(wait, current); + struct bio *def; int r = -EINVAL; down(&md->suspend_lock); @@ -1166,9 +1167,11 @@ int dm_suspend(struct mapped_device *md, int do_lockfs) /* were we interrupted ? */ r = -EINTR; if (atomic_read(&md->pending)) { + clear_bit(DMF_BLOCK_IO, &md->flags); + def = bio_list_get(&md->deferred); + __flush_deferred_io(md, def); up_write(&md->io_lock); unlock_fs(md); - clear_bit(DMF_BLOCK_IO, &md->flags); goto out; } up_write(&md->io_lock); -- cgit v1.2.3 From 7e51f257e87297a5b6fe6d136a8ef67206aaf3a8 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Mon, 27 Mar 2006 01:17:52 -0800 Subject: [PATCH] dm: store md name The patch stores a printable device number in struct mapped_device for use in warning messages and with a proposed netlink interface. Signed-off-by: Mike Anderson Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 48be69f3bcfa..f140d499602a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -69,6 +69,7 @@ struct mapped_device { request_queue_t *queue; struct gendisk *disk; + char name[16]; void *interface_ptr; @@ -842,6 +843,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); add_disk(md->disk); + format_dev_t(md->name, MKDEV(_major, minor)); atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); -- cgit v1.2.3 From 9ade92a9a5b0a3a10efa6551b8c67a9277bf0438 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Mon, 27 Mar 2006 01:17:53 -0800 Subject: [PATCH] dm: tidy mdptr Change dm_get_mdptr() to take a struct mapped_device instead of dev_t. Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-ioctl.c | 16 +++++++++++++--- drivers/md/dm.c | 10 ++-------- drivers/md/dm.h | 2 +- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 442e2be6052e..0693b6f54b7d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -600,12 +600,22 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) */ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { + struct mapped_device *md; + void *mdptr = NULL; + if (*param->uuid) return __get_uuid_cell(param->uuid); - else if (*param->name) + + if (*param->name) return __get_name_cell(param->name); - else - return dm_get_mdptr(huge_decode_dev(param->dev)); + + md = dm_get_md(huge_decode_dev(param->dev)); + if (md) { + mdptr = dm_get_mdptr(md); + dm_put(md); + } + + return mdptr; } static struct mapped_device *find_device(struct dm_ioctl *param) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f140d499602a..3d121cbc2fde 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -990,15 +990,9 @@ struct mapped_device *dm_get_md(dev_t dev) return md; } -void *dm_get_mdptr(dev_t dev) +void *dm_get_mdptr(struct mapped_device *md) { - struct mapped_device *md; - void *mdptr = NULL; - - md = dm_find_md(dev); - if (md) - mdptr = md->interface_ptr; - return mdptr; + return md->interface_ptr; } void dm_set_mdptr(struct mapped_device *md, void *ptr) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 0ff11d6f8158..17ffa8d671a7 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -47,7 +47,7 @@ struct mapped_device; int dm_create(struct mapped_device **md); int dm_create_with_minor(unsigned int minor, struct mapped_device **md); void dm_set_mdptr(struct mapped_device *md, void *ptr); -void *dm_get_mdptr(dev_t dev); +void *dm_get_mdptr(struct mapped_device *md); struct mapped_device *dm_get_md(dev_t dev); /* -- cgit v1.2.3 From 1134e5ae79bab61c05657ca35a6297cf87202e35 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Mon, 27 Mar 2006 01:17:54 -0800 Subject: [PATCH] dm table: store md Store an up-pointer to the owning struct mapped_device in every table when it is created. Access it with: struct mapped_device *dm_table_get_md(struct dm_table *t) Tables linked to md must be destroyed before the md itself. Signed-off-by: Mike Anderson Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-ioctl.c | 32 +++++++++++++++++++++----------- drivers/md/dm-table.c | 13 ++++++++++++- drivers/md/dm.c | 6 +++--- drivers/md/dm.h | 4 +++- 4 files changed, 39 insertions(+), 16 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0693b6f54b7d..65826bdac00c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -244,9 +244,9 @@ static void __hash_remove(struct hash_cell *hc) dm_table_put(table); } - dm_put(hc->md); if (hc->new_map) dm_table_put(hc->new_map); + dm_put(hc->md); free_cell(hc); } @@ -985,33 +985,43 @@ static int table_load(struct dm_ioctl *param, size_t param_size) int r; struct hash_cell *hc; struct dm_table *t; + struct mapped_device *md; + + md = find_device(param); + if (!md) + return -ENXIO; - r = dm_table_create(&t, get_mode(param), param->target_count); + r = dm_table_create(&t, get_mode(param), param->target_count, md); if (r) - return r; + goto out; r = populate_table(t, param, param_size); if (r) { dm_table_put(t); - return r; + goto out; } down_write(&_hash_lock); - hc = __find_device_hash_cell(param); - if (!hc) { - DMWARN("device doesn't appear to be in the dev hash table."); - up_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (!hc || hc->md != md) { + DMWARN("device has been removed from the dev hash table."); dm_table_put(t); - return -ENXIO; + up_write(&_hash_lock); + r = -ENXIO; + goto out; } if (hc->new_map) dm_table_put(hc->new_map); hc->new_map = t; + up_write(&_hash_lock); + param->flags |= DM_INACTIVE_PRESENT_FLAG; + r = __dev_status(md, param); + +out: + dm_put(md); - r = __dev_status(hc->md, param); - up_write(&_hash_lock); return r; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9558a4acec12..84f4594afd3c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -22,6 +22,7 @@ #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) struct dm_table { + struct mapped_device *md; atomic_t holders; /* btree table */ @@ -206,7 +207,8 @@ static int alloc_targets(struct dm_table *t, unsigned int num) return 0; } -int dm_table_create(struct dm_table **result, int mode, unsigned num_targets) +int dm_table_create(struct dm_table **result, int mode, + unsigned num_targets, struct mapped_device *md) { struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL); @@ -229,6 +231,7 @@ int dm_table_create(struct dm_table **result, int mode, unsigned num_targets) } t->mode = mode; + t->md = md; *result = t; return 0; } @@ -952,12 +955,20 @@ int dm_table_flush_all(struct dm_table *t) return ret; } +struct mapped_device *dm_table_get_md(struct dm_table *t) +{ + dm_get(t->md); + + return t->md; +} + EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_put_device); EXPORT_SYMBOL(dm_table_event); EXPORT_SYMBOL(dm_table_get_size); EXPORT_SYMBOL(dm_table_get_mode); +EXPORT_SYMBOL(dm_table_get_md); EXPORT_SYMBOL(dm_table_put); EXPORT_SYMBOL(dm_table_get); EXPORT_SYMBOL(dm_table_unplug_all); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3d121cbc2fde..b99df48cffed 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1007,18 +1007,18 @@ void dm_get(struct mapped_device *md) void dm_put(struct mapped_device *md) { - struct dm_table *map = dm_get_table(md); + struct dm_table *map; if (atomic_dec_and_test(&md->holders)) { + map = dm_get_table(md); if (!dm_suspended(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } __unbind(md); + dm_table_put(map); free_dev(md); } - - dm_table_put(map); } /* diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 17ffa8d671a7..79e800051a10 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -89,7 +89,8 @@ int dm_suspended(struct mapped_device *md); * Functions for manipulating a table. Tables are also reference * counted. *---------------------------------------------------------------*/ -int dm_table_create(struct dm_table **result, int mode, unsigned num_targets); +int dm_table_create(struct dm_table **result, int mode, + unsigned num_targets, struct mapped_device *md); void dm_table_get(struct dm_table *t); void dm_table_put(struct dm_table *t); @@ -107,6 +108,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q); unsigned int dm_table_get_num_targets(struct dm_table *t); struct list_head *dm_table_get_devices(struct dm_table *t); int dm_table_get_mode(struct dm_table *t); +struct mapped_device *dm_table_get_md(struct dm_table *t); void dm_table_presuspend_targets(struct dm_table *t); void dm_table_postsuspend_targets(struct dm_table *t); void dm_table_resume_targets(struct dm_table *t); -- cgit v1.2.3 From 3ac51e741a46af7a20f55e79d3e3aeaa93c6c544 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Mar 2006 01:17:54 -0800 Subject: [PATCH] dm store geometry Allow drive geometry to be stored with a new DM_DEV_SET_GEOMETRY ioctl. Device-mapper will now respond to HDIO_GETGEO. If the geometry information is not available, zero will be returned for all of the parameters. Signed-off-by: Darrick J. Wong Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-ioctl.c | 52 +++++++++++++++++++++++++++++++++++++++++++- drivers/md/dm.c | 46 +++++++++++++++++++++++++++++++++++++++ drivers/md/dm.h | 7 ++++++ include/linux/compat_ioctl.h | 2 ++ include/linux/dm-ioctl.h | 17 +++++++++++++-- 5 files changed, 121 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 65826bdac00c..8edd6435414d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -700,6 +701,54 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return dm_hash_rename(param->name, new_name); } +static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) +{ + int r = -EINVAL, x; + struct mapped_device *md; + struct hd_geometry geometry; + unsigned long indata[4]; + char *geostr = (char *) param + param->data_start; + + md = find_device(param); + if (!md) + return -ENXIO; + + if (geostr < (char *) (param + 1) || + invalid_str(geostr, (void *) param + param_size)) { + DMWARN("Invalid geometry supplied."); + goto out; + } + + x = sscanf(geostr, "%lu %lu %lu %lu", indata, + indata + 1, indata + 2, indata + 3); + + if (x != 4) { + DMWARN("Unable to interpret geometry settings."); + goto out; + } + + if (indata[0] > 65535 || indata[1] > 255 || + indata[2] > 255 || indata[3] > ULONG_MAX) { + DMWARN("Geometry exceeds range limits."); + goto out; + } + + geometry.cylinders = indata[0]; + geometry.heads = indata[1]; + geometry.sectors = indata[2]; + geometry.start = indata[3]; + + r = dm_set_geometry(md, &geometry); + if (!r) + r = __dev_status(md, param); + + param->data_size = 0; + +out: + dm_put(md); + return r; +} + static int do_suspend(struct dm_ioctl *param) { int r = 0; @@ -1234,7 +1283,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd) {DM_LIST_VERSIONS_CMD, list_versions}, - {DM_TARGET_MSG_CMD, target_message} + {DM_TARGET_MSG_CMD, target_message}, + {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry} }; return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b99df48cffed..973e63d530ae 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static const char *_name = DM_NAME; @@ -102,6 +103,9 @@ struct mapped_device { */ struct super_block *frozen_sb; struct block_device *suspended_bdev; + + /* forced geometry settings */ + struct hd_geometry geometry; }; #define MIN_IOS 256 @@ -227,6 +231,13 @@ static int dm_blk_close(struct inode *inode, struct file *file) return 0; } +static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct mapped_device *md = bdev->bd_disk->private_data; + + return dm_get_geometry(md, geo); +} + static inline struct dm_io *alloc_io(struct mapped_device *md) { return mempool_alloc(md->io_pool, GFP_NOIO); @@ -313,6 +324,33 @@ struct dm_table *dm_get_table(struct mapped_device *md) return t; } +/* + * Get the geometry associated with a dm device + */ +int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) +{ + *geo = md->geometry; + + return 0; +} + +/* + * Set the geometry of a device. + */ +int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) +{ + sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; + + if (geo->start > sz) { + DMWARN("Start sector is beyond the geometry limits."); + return -EINVAL; + } + + md->geometry = *geo; + + return 0; +} + /*----------------------------------------------------------------- * CRUD START: * A more elegant soln is in the works that uses the queue @@ -906,6 +944,13 @@ static int __bind(struct mapped_device *md, struct dm_table *t) sector_t size; size = dm_table_get_size(t); + + /* + * Wipe any geometry if the size of the table changed. + */ + if (size != get_capacity(md->disk)) + memset(&md->geometry, 0, sizeof(md->geometry)); + __set_size(md, size); if (size == 0) return 0; @@ -1261,6 +1306,7 @@ int dm_suspended(struct mapped_device *md) static struct block_device_operations dm_blk_dops = { .open = dm_blk_open, .release = dm_blk_close, + .getgeo = dm_blk_getgeo, .owner = THIS_MODULE }; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 79e800051a10..fd90bc8f9e45 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -14,6 +14,7 @@ #include #include #include +#include #define DM_NAME "device-mapper" #define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) @@ -85,6 +86,12 @@ int dm_wait_event(struct mapped_device *md, int event_nr); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); +/* + * Geometry functions. + */ +int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); +int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); + /*----------------------------------------------------------------- * Functions for manipulating a table. Tables are also reference * counted. diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index efb518f16bb3..89ab677cb993 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -140,6 +140,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS_32) COMPATIBLE_IOCTL(DM_TABLE_STATUS_32) COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32) COMPATIBLE_IOCTL(DM_TARGET_MSG_32) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32) COMPATIBLE_IOCTL(DM_VERSION) COMPATIBLE_IOCTL(DM_REMOVE_ALL) COMPATIBLE_IOCTL(DM_LIST_DEVICES) @@ -155,6 +156,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS) COMPATIBLE_IOCTL(DM_TABLE_STATUS) COMPATIBLE_IOCTL(DM_LIST_VERSIONS) COMPATIBLE_IOCTL(DM_TARGET_MSG) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY) /* Big K */ COMPATIBLE_IOCTL(PIO_FONT) COMPATIBLE_IOCTL(GIO_FONT) diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index fa75ba0d635e..c67c6786612a 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -80,6 +80,16 @@ * * DM_TARGET_MSG: * Pass a message string to the target at a specific offset of a device. + * + * DM_DEV_SET_GEOMETRY: + * Set the geometry of a device by passing in a string in this format: + * + * "cylinders heads sectors_per_track start_sector" + * + * Beware that CHS geometry is nearly obsolete and only provided + * for compatibility with dm devices that can be booted by a PC + * BIOS. See struct hd_geometry for range limits. Also note that + * the geometry is erased if the device size changes. */ /* @@ -218,6 +228,7 @@ enum { /* Added later */ DM_LIST_VERSIONS_CMD, DM_TARGET_MSG_CMD, + DM_DEV_SET_GEOMETRY_CMD }; /* @@ -247,6 +258,7 @@ typedef char ioctl_struct[308]; #define DM_TABLE_STATUS_32 _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct) #define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct) #define DM_TARGET_MSG_32 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, ioctl_struct) +#define DM_DEV_SET_GEOMETRY_32 _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, ioctl_struct) #endif #define DM_IOCTL 0xfd @@ -270,11 +282,12 @@ typedef char ioctl_struct[308]; #define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl) #define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) +#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 5 +#define DM_VERSION_MINOR 6 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2005-10-04)" +#define DM_VERSION_EXTRA "-ioctl (2006-02-17)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 48c9c27b8bcd2a328a06151e2d5c1170db0b701b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 27 Mar 2006 01:18:20 -0800 Subject: [PATCH] sem2mutex: drivers/md Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-table.c | 11 ++++++----- drivers/md/dm.c | 19 ++++++++++--------- drivers/md/kcopyd.c | 11 ++++++----- drivers/md/md.c | 11 ++++++----- 4 files changed, 28 insertions(+), 24 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 76610a6ac01c..8f56a54cf0ce 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define MAX_DEPTH 16 @@ -770,14 +771,14 @@ int dm_table_complete(struct dm_table *t) return r; } -static DECLARE_MUTEX(_event_lock); +static DEFINE_MUTEX(_event_lock); void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context) { - down(&_event_lock); + mutex_lock(&_event_lock); t->event_fn = fn; t->event_context = context; - up(&_event_lock); + mutex_unlock(&_event_lock); } void dm_table_event(struct dm_table *t) @@ -788,10 +789,10 @@ void dm_table_event(struct dm_table *t) */ BUG_ON(in_interrupt()); - down(&_event_lock); + mutex_lock(&_event_lock); if (t->event_fn) t->event_fn(t->event_context); - up(&_event_lock); + mutex_unlock(&_event_lock); } sector_t dm_table_get_size(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 973e63d530ae..4d710b7a133b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -743,14 +744,14 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ -static DECLARE_MUTEX(_minor_lock); +static DEFINE_MUTEX(_minor_lock); static DEFINE_IDR(_minor_idr); static void free_minor(unsigned int minor) { - down(&_minor_lock); + mutex_lock(&_minor_lock); idr_remove(&_minor_idr, minor); - up(&_minor_lock); + mutex_unlock(&_minor_lock); } /* @@ -763,7 +764,7 @@ static int specific_minor(struct mapped_device *md, unsigned int minor) if (minor >= (1 << MINORBITS)) return -EINVAL; - down(&_minor_lock); + mutex_lock(&_minor_lock); if (idr_find(&_minor_idr, minor)) { r = -EBUSY; @@ -788,7 +789,7 @@ static int specific_minor(struct mapped_device *md, unsigned int minor) } out: - up(&_minor_lock); + mutex_unlock(&_minor_lock); return r; } @@ -797,7 +798,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor) int r; unsigned int m; - down(&_minor_lock); + mutex_lock(&_minor_lock); r = idr_pre_get(&_minor_idr, GFP_KERNEL); if (!r) { @@ -819,7 +820,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor) *minor = m; out: - up(&_minor_lock); + mutex_unlock(&_minor_lock); return r; } @@ -1014,13 +1015,13 @@ static struct mapped_device *dm_find_md(dev_t dev) if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) return NULL; - down(&_minor_lock); + mutex_lock(&_minor_lock); md = idr_find(&_minor_idr, minor); if (!md || (dm_disk(md)->first_minor != minor)) md = NULL; - up(&_minor_lock); + mutex_unlock(&_minor_lock); return md; } diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index ed71f3f94620..72480a48d88b 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "kcopyd.h" @@ -581,21 +582,21 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Unit setup *---------------------------------------------------------------*/ -static DECLARE_MUTEX(_client_lock); +static DEFINE_MUTEX(_client_lock); static LIST_HEAD(_clients); static void client_add(struct kcopyd_client *kc) { - down(&_client_lock); + mutex_lock(&_client_lock); list_add(&kc->list, &_clients); - up(&_client_lock); + mutex_unlock(&_client_lock); } static void client_del(struct kcopyd_client *kc) { - down(&_client_lock); + mutex_lock(&_client_lock); list_del(&kc->list); - up(&_client_lock); + mutex_unlock(&_client_lock); } static DEFINE_MUTEX(kcopyd_init_lock); diff --git a/drivers/md/md.c b/drivers/md/md.c index 147efcb1e8ca..c9c9c096ad80 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -43,6 +43,7 @@ #include /* for invalidate_bdev */ #include #include +#include #include @@ -2500,7 +2501,7 @@ int mdp_major = 0; static struct kobject *md_probe(dev_t dev, int *part, void *data) { - static DECLARE_MUTEX(disks_sem); + static DEFINE_MUTEX(disks_mutex); mddev_t *mddev = mddev_find(dev); struct gendisk *disk; int partitioned = (MAJOR(dev) != MD_MAJOR); @@ -2510,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (!mddev) return NULL; - down(&disks_sem); + mutex_lock(&disks_mutex); if (mddev->gendisk) { - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev_put(mddev); return NULL; } disk = alloc_disk(1 << shift); if (!disk) { - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev_put(mddev); return NULL; } @@ -2536,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - up(&disks_sem); + mutex_unlock(&disks_mutex); mddev->kobj.parent = &disk->kobj; mddev->kobj.k_name = NULL; snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); -- cgit v1.2.3